Instructions -- 23May2021

image

External Sources I/O

Github I/O

In [1]:
import os
import pickle
from pathlib import Path
from datetime import datetime
from datetime import date
import random
from sklearn.model_selection import train_test_split



now = datetime.now()
current_time = now.strftime("%H:%M:%S")



# Setup Constants
GIT_NEW_BRANCH_NAME = f'Lab8: {date.today()} at {current_time}'
GIT_DEFAULT_BRANCH_NAME = 'main'
GIT_USER_NAME = 'Shahar Raz'
GIT_USER_EMAIL = 'SRazStudent@gmail.com'
GIT_DEFAULT_MESSAGE = f'{GIT_USER_NAME}, {date.today()} at {current_time}'
GIT_SAVE_DESTINATION = '/content/ComputerVision2021B/Lab Reports/Lab8/'

!git init

def importFilesFromGithub(branch_name = GIT_DEFAULT_BRANCH_NAME):
    """returns the directory in-which we'll save the files"""

    # if not already imported
    if not Path("/content/ComputerVision2021B").exists(): 
        # add the new remote (with id key)
        ! git remote add CV2021Github https://github.com/ShahaRaz/ComputerVision2021B.git
        # create new local directory
        ! mkdir ComputerVision2021B/
        # pulling project to local dir
        ! git --work-tree=/content/ComputerVision2021B/ pull CV2021Github $branch_name;

    return r'/content/ComputerVision2021B/Assignment/Assignment2'




def saveFilesToGithub(message_of_commit = GIT_DEFAULT_MESSAGE,branch_name = GIT_NEW_BRANCH_NAME, user_name = GIT_USER_NAME, user_email = GIT_USER_EMAIL):
    def setLocalID():
        # adding my name
        !git config --global user.name  user_name   #"Shahar  "
        !git config --global user.email user_email  #"SRazStudent@gmail.com"
        #__ changing use name: __
        # !git config --global --unset user.name
        # !git config --global user.name  "Shahar ModifiedName "
                        # list configs
                        # !git config -l

    setLocalID()

    # create new branch
    if branch_name is not GIT_DEFAULT_BRANCH_NAME:
        !git checkout -b $branch_name
        
    # add files to commit
    !git add /content/ComputerVision2021B/*
                    # #checking the status
                    # !git status


    # Commit all Changes
    !git commit -a -m message_of_commit #"delete meee"

# Push to remote
    !git push CV2021Github $branch_name #master


directory = importFilesFromGithub()
Initialized empty Git repository in /content/.git/
remote: Enumerating objects: 233, done.
remote: Counting objects: 100% (233/233), done.
remote: Compressing objects: 100% (206/206), done.
remote: Total 233 (delta 35), reused 118 (delta 5), pack-reused 0
Receiving objects: 100% (233/233), 93.38 MiB | 21.25 MiB/s, done.
Resolving deltas: 100% (35/35), done.
From https://github.com/ShahaRaz/ComputerVision2021B
 * branch            main       -> FETCH_HEAD
 * [new branch]      main       -> CV2021Github/main

Asmnt2:

Imports

In [2]:
from matplotlib import pyplot as plt
import numpy as np

import cv2
from scipy.signal import convolve, convolve2d
import math

from torch.utils.data import Dataset, DataLoader
import torch
import albumentations as A

from torchsummary import summary


SIZE_IMAGES_TO = 224 # 224 - IMAGENET SIZE # 240 - (what was asked in asgmnt paper)

## Summary tensor size:
NUM_OF_DIM = 3 # RGB
SUMMARY_TENSOR_SIZE = (NUM_OF_DIM, SIZE_IMAGES_TO, SIZE_IMAGES_TO)

1.Preprocessing:

1.0 Label Encoding Names-> int

In [3]:
lbl_encoding = [] 

# Instructions: 

# check if label is already in --- --- ---
    # if 'Shahar' in lbl_encoding:

# insert new label --- --- --- --- --- ---
    # lbl_encoding.append("addMe")

# get index of element --- --- --- --- ---
    # lbl_encoding.index('getMyIndex')

# get element from index  --- --- --- --- ---
    # lbl_encoding[3]

1.1 Dataset Class (Paths)

we load paths of photos inorder to keep the photos off the RAM when they're not in use.

Otherwise, we can easly fill up the entire ram (when using large datasets)

In [4]:
class LoadDataFromDisk:

    @staticmethod
    def split_dataset_3way(X, y,validation_size = 0.30 ,test_size = 0.10):
        """Will Split the data to Train, Test and Validation  """
        # for example: 90% to training [= 0.1]
        if test_size > 0:
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size)
        # from that 90%, 10% will go to [ validation = 0.1 ] 
        if validation_size > 0 :
            X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size = validation_size)
        # will leave us with:
        # 10% - Test
        # 81% - Train
        # 9% - Validation
        return X_train, X_validation, X_test, y_train, y_validation, y_test
    

    def __init__(self, path_to_images_folder):
        """input: path to Photos folder, which contains folders with photos
        
            output: array of tuples (image, person_name)
        """
        # For name encoding (1.0)
        label_of_name = -1

        # self.path_to_images_folder = path_to_images_folder
        self.original_imgs = []
        
        # go over folders inside directory 
        for dirpath, dirnames, filenames in os.walk(path_to_images_folder):
            person_name = dirpath.split('/')[-1]

            # skip directory itself
            if person_name is 'Photos':
                continue

            # Adding Pictures Of person_name
            for i in range(0,len(filenames)):

                # Name Encoding:         
                # if name not encoded already
                if not person_name in lbl_encoding:
                    # encode name
                    lbl_encoding.append(person_name)
                   
                # get name's index(code)
                label_of_name = lbl_encoding.index(person_name)

                path_to_image = f'{path_to_images_folder}/{person_name}/{filenames[i]}'

                self.original_imgs.append((path_to_image, int(label_of_name)))


    

    def load_data(self, perc_validation=0.2, perc_test=0.0):

        # Convert to NP.ndarray
        self.original_imgs = np.asarray(self.original_imgs)


        # split X-y 
        X, y = self.original_imgs[:, 0], self.original_imgs[:, 1]

        # Convert to encoding to uint8 (save space (1byte))
        y = y.astype(np.int32) # WORKS WITH uint89
        # # keeping 3way split function ____ for future reference ___
        # X_train, X_valid, X_test, y_train, y_valid, y_test = self.split_dataset_3way\
        #         (X, y, validation_size = perc_validation ,test_size = perc_test)


        # train_test_split shuffles the data SHUFFLING ALL DATA HERE
        X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = perc_validation,shuffle = True)

        return (X_train, y_train), (X_valid, y_valid)

1.2 Present the images & split

In [5]:
photos_directory = f'{directory}/Photos'
data = LoadDataFromDisk(photos_directory)
(X_train, y_train), (X_valid, y_valid) = data.load_data()
# Sainity check: 
print('train(x,y)  \t  validation(x,y)')
print(len(X_train), len(y_train), '\t\t\t', len(X_valid), len(y_valid))


print(' Printing train data:')
for image, lbl in zip(X_train, y_train):
    plt.imshow(cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2RGB))
    plt.title(lbl_encoding[lbl])
    plt.show()
train(x,y)  	  validation(x,y)
31 31 			 8 8
 Printing train data:

1.3 Dataloader From Disk W/ Augmentations

In [6]:
from torch.utils.data import Dataset, DataLoader
import torch
import albumentations as A



class SuperFaceRecognitionDataset(Dataset):
    def __init__(self, x_train, y_train, transforms_album = None, transforms_pytorch = None):
        super (SuperFaceRecognitionDataset, self).__init__()
        self.x_train = x_train
        self.y_train = y_train
        self.transforms_album = transforms_album
        self.transforms_pytorch = transforms_pytorch

    def apply_transforms(self, image):
        # Albumentations Transform
        if self.transforms_album:
            image = self.transforms_album(image=image)["image"]
        
        # Pytorch Transforms
        if self.transforms_pytorch:
            image = self.transforms_pytorch()

        return image


    def __getitem__(self,idx):
        assert isinstance(idx, (int))

        if idx < len(self.x_train) + 1: # (+1 is some sort of bug) TODO: Fixme
            # print(self.x_train[idx])

            img = cv2.imread(self.x_train[idx])
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            # img = pil2np(self.x_train[idx])
            label = self.y_train[idx]
            # convert label to tensor
            
            label = torch.tensor(label)

            # apply_transforms(img)
            if self.transforms_album:
                img = self.transforms_album(image=img)["image"]
        
            # TODO: Figure out how to apply.
            # # Pytorch Transforms
            # if self.transforms_pytorch:
            #     img = self.transforms_pytorch(img)


            img = (img.astype(np.float) )/ img.max() # normlize values to [0,1]
            img = torch.tensor(img) #return as tensor object (for future )

            # img = img.float() ## TODO: FIX THIS STUPID BUG
            # label = label.long()
            
            return (img, label)

            ### WHAT IS WRONG?? 
            # # [Channel , H , W] -> Into The Network -> [Batch, Channel, Hight, Width]
            # return (img[np.newaxis, :, :], label) ## TODO : CHECK NEWAXIS DESIGN
        else:
            print(idx)
            return -1


    

    def add_pytorch_transforms(self, transforms_pytorch):
        self.transforms_pytorch = transforms_pytorch
    

    def __len__(self):
        return len(self.x_train)

    def get_lists(self):
        return self.x_train, self.y_train

    def set_lists(self, X, y):
        self.x_train = X
        self.y_train = y
In [7]:
transforms_albumentations = A.Compose([ 
    A.Blur(blur_limit=3, p = 0.3),
    A.GaussNoise(p=0.2),
    A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25 ,p=0.9),
    # A.CLAHE(p=0.3),
    A.RandomBrightness(p=0.6,),
    A.RandomContrast(p=0.3),
    A.RandomGamma(p=0.4),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=10, p = 0.9),

    A.SmallestMaxSize(SIZE_IMAGES_TO+1),
    A.CenterCrop(SIZE_IMAGES_TO,SIZE_IMAGES_TO),
    # A.Normalize(),    
    

])
# Create dataset objects
train_face_dataset = SuperFaceRecognitionDataset(X_train, y_train, transforms_albumentations)
valid_face_dataset = SuperFaceRecognitionDataset(X_valid, y_valid, transforms_albumentations) ## TODO: check if apply transformation on validation set..

plt.imshow(train_face_dataset[2][0])
Out[7]:
<matplotlib.image.AxesImage at 0x7f699ff43c10>
In [8]:
print(type(transforms_albumentations))
<class 'albumentations.core.composition.Compose'>

1.4 Display Data W/ Augmentation

In [9]:
BATCH_SIZE = 8
train_loader = DataLoader(train_face_dataset, batch_size=BATCH_SIZE, shuffle=True,num_workers=2)
valid_loader = DataLoader(valid_face_dataset, batch_size=BATCH_SIZE, shuffle=True,num_workers=2)

for images, labels in train_loader:
    for image, lbl in zip(images, labels):
        plt.imshow(image)
        plt.title(lbl_encoding[lbl])
        print(image.shape)
        plt.show()
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])
torch.Size([224, 224, 3])

1.5 Create OpenCV Model to Idenify faces

In [10]:
# https://towardsdatascience.com/face-detection-in-2-minutes-using-opencv-python-90f89d7c0f81
import cv2

# get the XML file from cv2's git repo
if not Path("/content/haarcascade_frontalface_default.xml").exists(): 
    !wget https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml -L 'haarcascade_frontalface_default.xml'


face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

def find_face(image, isPrinting=False):
    image = (image * 255).astype(np.uint8)
    faces = face_cascade.detectMultiScale(image, 1.1, 4)
    if isPrinting is True:
        plt.imshow(image)
        plt.show()
        for (x, y, w, h) in faces:
            cv2.rectangle(image, (x, y), (x+w, y+h), (255, 0, 0), 2)
        plt.imshow(image)
        plt.show()
        
    return faces


img = iter(train_loader).next()[0][0].numpy()
img = (img * 255).astype(np.uint8)

plt.imshow(img)
print(type(img), img.shape)
plt.show()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
plt.imshow(gray, 'gray')
plt.show()
faces = face_cascade.detectMultiScale(img, 1.1, 4)
for (x, y, w, h) in faces:
    cv2.rectangle(img, (x, y), (x+w, y+h), (115, 0, 255), 1)
# Display the output
plt.imshow(img)
plt.show()
--2021-05-24 08:32:54--  https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 930127 (908K) [text/plain]
Saving to: ‘haarcascade_frontalface_default.xml’

haarcascade_frontal 100%[===================>] 908.33K  --.-KB/s    in 0.1s    

2021-05-24 08:32:54 (6.77 MB/s) - ‘haarcascade_frontalface_default.xml’ saved [930127/930127]

--2021-05-24 08:32:54--  http://haarcascade_frontalface_default.xml/
Resolving haarcascade_frontalface_default.xml (haarcascade_frontalface_default.xml)... failed: Name or service not known.
wget: unable to resolve host address ‘haarcascade_frontalface_default.xml’
FINISHED --2021-05-24 08:32:54--
Total wall clock time: 0.6s
Downloaded: 1 files, 908K in 0.1s (6.77 MB/s)
<class 'numpy.ndarray'> (224, 224, 3)

1.6 Choose Best representing images & explain

  1. Filter bad Photos: As we selected the photos we've choosen them so that will be good for such face detection task, so we didn't filter any photo in this stage.

  2. Augmentations that were not good:

    a. color invert (complitly messed up the images, we looked like white-walkers)

    b. rotation over 10 degrees. it is extreamly uncommon for the eyes to be below the mouth, must of the shots are in range of 10deg of horizontal.

In [11]:
type(train_face_dataset)
Out[11]:
__main__.SuperFaceRecognitionDataset
In [12]:
# for this process we'll check if our face detector detects faces in the image. 
counter = 0
for index, img in enumerate(train_face_dataset):
    # plt.imshow(img[0])
    # plt.show()
    if len(find_face(img[0].numpy())) is 0:

        # face not found in the pic, remove image
        X, y = train_face_dataset.get_lists()
        X = np.delete(X, index)
        y = np.delete(y, index)
        train_face_dataset.set_lists(X, y)

        counter += 1
        print('deleting image at index: ', index)
        

print('removed ', counter, ' elements from data.')

# NOTE! Run find_face() is defined in 2.1, please run this cell first.
deleting image at index:  1
removed  1  elements from data.
In [13]:
print(f'training lenght = {len(train_face_dataset)}\nvalid lenght = {len(valid_face_dataset)}')
training lenght = 30
valid lenght = 8

Asmnt3:

Helper Functions

Save & Load models

In [14]:
# Creating new folder for model saving
! mkdir savedModels/

SAVE_MODELS_DIR = '/content/savedModels'

def save_model(model, optimizer, epoch, loss, path):
  torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, path)
  


def load_model(path, model, optimizer):
    checkpoint = torch.load(path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    return epoch, loss

Setup GPU / cuda

In [15]:
# Check GPU Given

!nvidia-smi

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
Mon May 24 08:32:58 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    33W / 250W |    257MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
+-----------------------------------------------------------------------------+

Metrics of evaluation

In [16]:
!pip install torchmetrics
import torchmetrics 
import torch.nn as nn
Collecting torchmetrics
  Downloading https://files.pythonhosted.org/packages/3b/e8/513cd9d0b1c83dc14cd8f788d05cd6a34758d4fd7e4f9e5ecd5d7d599c95/torchmetrics-0.3.2-py3-none-any.whl (274kB)
     |████████████████████████████████| 276kB 2.9MB/s 
Requirement already satisfied: torch>=1.3.1 in /usr/local/lib/python3.7/dist-packages (from torchmetrics) (1.8.1+cu101)
Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from torchmetrics) (20.9)
Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from torch>=1.3.1->torchmetrics) (1.19.5)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch>=1.3.1->torchmetrics) (3.7.4.3)
Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->torchmetrics) (2.4.7)
Installing collected packages: torchmetrics
Successfully installed torchmetrics-0.3.2
In [17]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

Monitor - TensorBoard

In [18]:
from torch.utils.tensorboard import SummaryWriter

experiment = 1
run = 0


writer_m0_VGG16 = SummaryWriter(f'runs/m0_VGG16_{experiment}')
valid_writer_m0_VGG16 = SummaryWriter(f'runs/m0_VGG16_{experiment}_valid')
# Other 5 models will show at pretrain writer
In [19]:
%load_ext tensorboard 
%reload_ext tensorboard
In [20]:
%tensorboard --logdir=runs

Loss Class

In [21]:
class FL(nn.Module):
    def __init__(self, alpha = 0.2, gamma = 2):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma 
        self.CEL = nn.CrossEntropyLoss()
      
    def forward(self, inputs, targets):
        CE_loss = self.CEL(inputs, targets)  #ce = -ln(pt)
        pt = torch.exp(-CE_loss)   #e^-ln(pt) = pt
        F_loss = self.alpha * (1-pt)**self.gamma * CE_loss

        return F_loss

VGG Architecure

Imports

In [22]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import tqdm
import pprint

Constants & Transforms

In [23]:
# BATCH_SIZE = 5 # Defined in 1.4
LEARNING_RATE = 0.01
N_EPOCHS = 30
N_CLASSES = 2 # Shahar / Messi
# VGG_INPUT_IMAGE_240 = SIZE_IMAGES_TO _BLABLABLLAHERE
VGG_INPUT_CHANNELS = 3 # RGB

device = "cuda" if torch.cuda.is_available() else "cpu"

VGG_types = {
    "VGG11": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG13": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG16": [64,64,"M",128,128,"M",256,256,256,"M",512,512,512,"M",512,512,512,"M",],
    "VGG19": [64,64,"M",128,128,"M",256,256,256,256,"M",512,512,512,512,"M",512,512,512,512,"M",],
}

# transforms_torch = transforms.Compose([
#     transforms.RandomResizedCrop(224),
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor(),
#     transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ],
#                          std  = [ 0.229, 0.224, 0.225 ]),
#     ])

# # print(type(transforms_torch))
# train_loader.dataset.add_pytorch_transforms(transforms_torch)

VGG NN Class

Module Github Repo

In [24]:
class VGG_net(nn.Module):

    def __init__(self, in_channels=3, num_classes=1000):
        super(VGG_net, self).__init__()
        self.in_channels = in_channels
        self.conv_layers = self.create_conv_layers(VGG_types["VGG16"])

        self.fcs = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        # Fix size..? 
        # if not (x.shape[2] is SIZE_IMAGES_TO) or not (x.shape[3] is SIZE_IMAGES_TO) :
        #     x = torch.reshape(x,[-1,VGG_INPUT_CHANNELS,VGG_INPUT_IMAGE_240,VGG_INPUT_IMAGE_240])
        #     x = x.float()

        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return x

    def create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels

        for x in architecture:
            if type(x) == int:
                out_channels = x

                layers += [
                    nn.Conv2d(
                        in_channels=in_channels,
                        out_channels=out_channels,
                        kernel_size=(3, 3),
                        stride=(1, 1),
                        padding=(1, 1),
                    ),
                    nn.BatchNorm2d(x),
                    nn.ReLU(),
                ]
                in_channels = x
            elif x == "M":
                layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]

        return nn.Sequential(*layers)
In [25]:
# Simple Demo:
demo_model = VGG_net(in_channels=3, num_classes=1000).to(device)
print(demo_model)
N_deleteme = 16 #(Mini batch size)
print('=' * 400)

x = torch.randn(16, 3, SIZE_IMAGES_TO, SIZE_IMAGES_TO).to(device)
x=x.float()
print(type(x))
print(demo_model(x).shape)
VGG_net(
  (conv_layers): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (16): ReLU()
    (17): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (19): ReLU()
    (20): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (21): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (22): ReLU()
    (23): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (26): ReLU()
    (27): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (28): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (29): ReLU()
    (30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (31): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (32): ReLU()
    (33): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (34): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (35): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (36): ReLU()
    (37): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (38): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (39): ReLU()
    (40): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (41): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (42): ReLU()
    (43): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (fcs): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)
================================================================================================================================================================================================================================================================================================================================================================================================================
<class 'torch.Tensor'>
torch.Size([16, 1000])
================================================================================================================================================================================================================================================================================================================================================================================================================

Verfy Model - Overfitting

overfitting a small batch:

In [26]:
N_OFITTING_EPOCS = 5000

def over_fit_a_batch(imgs, targets, model, optimizer, epoch, criterion,scheduler):
    # Set model to train mode
    model.train() 

    # Best model result (for saving purposes)
    best_acc = 0

    #rounds in a row in-which we didn't improve
    non_improved_rounds = 0

    # create new Meters
    accuracy = AverageMeter()
    losses = AverageMeter()


    # Fixing Shape
    if not (imgs.shape[2] is SIZE_IMAGES_TO) or not (imgs.shape[3] is SIZE_IMAGES_TO) :
        imgs = torch.reshape(imgs,[-1,VGG_INPUT_CHANNELS,SIZE_IMAGES_TO,SIZE_IMAGES_TO])

    # TODO: Move this .float() part out of this loop...    
    imgs = imgs.float()
    targets = targets.long()

    imgs = imgs.to(device)
    targets = targets.to(device)

    # Predict - Forward 
    output = model(imgs)

    # Calc loss
    loss = criterion(output, targets)

    # setup optimizer
    optimizer.zero_grad()

    # Backward
    loss.backward()
    
    # Step the optimizer
    optimizer.step()

    # Choose single output for each image (highest probability from softmax)
    pred = output.argmax(dim=1 , keepdim=True)
    
    # Calc accuracy
    acc = torchmetrics.functional.accuracy(pred, targets)

    # Update losses & Accuracy logs
    losses.update(loss.item(), imgs.size(0))
    accuracy.update(acc, imgs.size(0))

    # Update Optimizer's Scheduler
    scheduler.step(loss)
    return acc

# Fetch batch
data, targets = next(iter(train_loader))

# Create an instance of the class
validate_VGG16 = VGG_net(in_channels=3, num_classes=N_CLASSES)

# Set calculations to GPU device
validate_VGG16.cuda() # .to(device) ???? TODO : CHECK THE DIFFERENCE...
#_ ALTERNATIVE _
# m1_MobileNetV3 = m1_MobileNetV3.to(device)


# Loss
# criterion = FL() # TODO : CHECK THE DIFFERENCE...
#_ ALTERNATIVE _
criterion = nn.CrossEntropyLoss()


# Optimizer: 
optimizer = torch.optim.Adam(validate_VGG16.parameters(), lr=LEARNING_RATE)

# Scheduler - decrease the learning rate when approaching fine-tuning stage.
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)



for epoch in range(N_OFITTING_EPOCS):
    if epoch % 500 is 0:
        print(f'epoch = {epoch}, acc = {over_fit_a_batch(data, targets, validate_VGG16, optimizer, epoch, criterion,scheduler)}')
epoch = 0, acc = 0.5
epoch = 500, acc = 0.625
epoch = 1000, acc = 0.5
epoch = 1500, acc = 0.5
epoch = 2000, acc = 0.5
epoch = 2500, acc = 0.625
epoch = 3000, acc = 0.75
epoch = 3500, acc = 0.375
epoch = 4000, acc = 0.5
epoch = 4500, acc = 0.625

We see that our network is not able to overfit over the data, I've try to change many parameters:

batch size | Learning Rate | Augmentations ...

nothing works.. I must continue anyway due..

In [27]:
DEBUG = False 
TENSORBOARD_EVERY_N_EPOCHS = 1
CLEAN_CACHE_EVERY_N_EPOCHS = 7


def valid(dataloader, model, criterion, optimizer):
    # Set model to test mode
    model.eval()
    acc_loss = 0
    correct = 0
    total = 0
    losses = AverageMeter()
    for I, (imgs, targets) in enumerate(dataloader):

        # Fixing Shape
        if not (imgs.shape[2] is SIZE_IMAGES_TO) or not (imgs.shape[3] is SIZE_IMAGES_TO) :
            imgs = torch.reshape(imgs,[-1,VGG_INPUT_CHANNELS,SIZE_IMAGES_TO,SIZE_IMAGES_TO])
            # Maybe Use permute???
        imgs = imgs.float()
        targets = targets.long()
        imgs = imgs.to(device)
        targets = targets.to(device)
        with torch.no_grad():
            output = model(imgs)
            loss = criterion(output, targets)
        losses.update(loss.item(), imgs.size(0))
        pred = output.argmax(dim = 1, keepdim = True)
        correct += pred.eq(targets.view_as(pred)).sum()
        total += imgs.shape[0]

    accuracy = (100 * (correct / total )).item()


    return losses.avg, accuracy

def train_single_epoch(train_loader, model, optimizer, epoch, criterion, writer,
          validation_writer, name, best_acc, non_improved_rounds = 0,
          scheduler = None,is_running_valid = True):
    
    """
    returns the best accuracy on validation that it got so far.
    """
    # Set model to train mode
    model.train() 

    
    #rounds in a row in-which we didn't improve
    

    # create new Meters
    accuracy = AverageMeter()
    losses = AverageMeter()

    for i, (imgs, targets) in enumerate(train_loader):

        # Fixing Shape
        if not (imgs.shape[2] is SIZE_IMAGES_TO) or not (imgs.shape[3] is SIZE_IMAGES_TO) :
            imgs = torch.reshape(imgs,[-1,VGG_INPUT_CHANNELS,SIZE_IMAGES_TO,SIZE_IMAGES_TO])

        # TODO: Move this .float() part out of this loop...    
        imgs = imgs.float()
        targets = targets.long()

        # Pass data to GPU
        imgs = imgs.to(device)
        targets = targets.to(device)

        # Predict - Forward 
        output = model(imgs)

        # Calc loss
        loss = criterion(output, targets)

        if DEBUG:
            print('output',output.detach().cpu().numpy(),
                    'shape', output.detach().cpu().numpy().shape )

            print('targets',targets.detach().cpu().numpy(),
                    'shape', targets.detach().cpu().numpy().shape )
            

        # setup optimizer
        optimizer.zero_grad()

        # Backward
        loss.backward()
        
        # Step the optimizer
        optimizer.step()
    
        # Choose single output for each image (highest probability from softmax)
        pred = output.argmax(dim=1 , keepdim=True)
        
        # Calc accuracy
        acc = torchmetrics.functional.accuracy(pred, targets)

        # Update losses & Accuracy logs
        losses.update(loss.item(), imgs.size(0))
        accuracy.update(acc, imgs.size(0))


        # Clear cached data:
        del imgs
        del targets
        if epoch % CLEAN_CACHE_EVERY_N_EPOCHS is 0: 
            # imgs.detach()
            # targets.detach()
            torch.cuda.empty_cache()


    # Update Optimizer's Scheduler
    if scheduler:
        scheduler.step(loss)

    # Validate model in current state
   
    valids_loss, valids_acc = valid(valid_loader, model, criterion, optimizer)

    # Write Results To File For TensorBoard Monitorization
    # if epoch % TENSORBOARD_EVERY_N_EPOCHS is 0:
    #     # Write train results
    writer.add_scalar(' average training loss', losses.avg,  epoch)
    writer.add_scalar(' average training accuracy', accuracy.avg,  epoch)
    
    # write valid results
    validation_writer.add_scalar(' average validating loss', valids_loss,  epoch)
    validation_writer.add_scalar(' average validating accuracy', valids_acc,  epoch)

    # save best model's state (only if bigger then 0.6)
    if epoch > 5:
        if best_acc < valids_acc:
            print(f'line120: best_acc = {best_acc}, valids_acc = {valids_acc}')
            best_acc = valids_acc
            %time save_model(model, optimizer, epoch, losses.avg, f'{SAVED_MODEL_PATH}{name}.wow')
            non_improved_rounds = 0
        else:
            non_improved_rounds += 1

    return best_acc, non_improved_rounds
    # elif non_improved_rounds is 3:
    #     load_model()

Train_N_epochs

In [28]:
# Set minimum accuracy for saving the model:
MINIMUM_ACC_4RECORDING = 60.0 
MAX_ROUNDS_NON_IMPROVED = 5


def train_n_epochs(model, n_epochs, train_loader, valid_loader, criterion, optimizer, scheduler, model_name,
                   train_writer_tBoard, valid_writer_tBoard,best_acc):
    path_to_model = f'{SAVED_MODEL_PATH}{model_name}.wow'
    non_improved_rounds = 0 

    for i in tqdm.tqdm(range(n_epochs), total=n_epochs): # Do total N_EPOCHS
        best_acc, non_improved_rounds = train_single_epoch(train_loader, model, optimizer, i, criterion, train_writer_tBoard,
            valid_writer_tBoard, model_name, best_acc, non_improved_rounds, scheduler)
        
        # If saved model once atleast
        if Path(path_to_model).exists():

            # if not improving for 3 epochs, return to best model (lost the way)
            if non_improved_rounds is MAX_ROUNDS_NON_IMPROVED:
                print(f'line19 epoch #{i}, reverting back to model w/ {best_acc} accuracy')
                epoch, loss = load_model(path_to_model, model, optimizer)
                non_improved_rounds = 0

    return best_acc

NN instance & Hyper Parameters

In [29]:
current_model_name = 'VGG16'
SAVED_MODEL_PATH = '/content/savedModels/'
vgg16_path = f'{SAVED_MODEL_PATH}{current_model_name}.wow'

# Create an instance of the class
m0_VGG16 = VGG_net(in_channels=3, num_classes=N_CLASSES)

# Set calculations to GPU device
m0_VGG16.cuda() # .to(device) ???? TODO : CHECK THE DIFFERENCE...
#_ ALTERNATIVE _
# m1_MobileNetV3 = m1_MobileNetV3.to(device)


# Loss
# criterion = FL() # TODO : CHECK THE DIFFERENCE...
#_ ALTERNATIVE _
criterion = nn.CrossEntropyLoss()


# Optimizer: 
optimizer = torch.optim.Adam(m0_VGG16.parameters(), lr=LEARNING_RATE)

# Scheduler - decrease the learning rate when approaching fine-tuning stage.
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)



# Check if we haveSAVED_MODEL_PATH already saved model
if Path(vgg16_path).exists():
    epoch, loss = load_model(vgg16_path, m0_VGG16, optimizer)
else:
    epoch = 0

# Check out the summary while the model is training :-)
print(epoch)
summary(m0_VGG16, SUMMARY_TENSOR_SIZE)
0
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 64, 224, 224]           1,792
       BatchNorm2d-2         [-1, 64, 224, 224]             128
              ReLU-3         [-1, 64, 224, 224]               0
            Conv2d-4         [-1, 64, 224, 224]          36,928
       BatchNorm2d-5         [-1, 64, 224, 224]             128
              ReLU-6         [-1, 64, 224, 224]               0
         MaxPool2d-7         [-1, 64, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]          73,856
       BatchNorm2d-9        [-1, 128, 112, 112]             256
             ReLU-10        [-1, 128, 112, 112]               0
           Conv2d-11        [-1, 128, 112, 112]         147,584
      BatchNorm2d-12        [-1, 128, 112, 112]             256
             ReLU-13        [-1, 128, 112, 112]               0
        MaxPool2d-14          [-1, 128, 56, 56]               0
           Conv2d-15          [-1, 256, 56, 56]         295,168
      BatchNorm2d-16          [-1, 256, 56, 56]             512
             ReLU-17          [-1, 256, 56, 56]               0
           Conv2d-18          [-1, 256, 56, 56]         590,080
      BatchNorm2d-19          [-1, 256, 56, 56]             512
             ReLU-20          [-1, 256, 56, 56]               0
           Conv2d-21          [-1, 256, 56, 56]         590,080
      BatchNorm2d-22          [-1, 256, 56, 56]             512
             ReLU-23          [-1, 256, 56, 56]               0
        MaxPool2d-24          [-1, 256, 28, 28]               0
           Conv2d-25          [-1, 512, 28, 28]       1,180,160
      BatchNorm2d-26          [-1, 512, 28, 28]           1,024
             ReLU-27          [-1, 512, 28, 28]               0
           Conv2d-28          [-1, 512, 28, 28]       2,359,808
      BatchNorm2d-29          [-1, 512, 28, 28]           1,024
             ReLU-30          [-1, 512, 28, 28]               0
           Conv2d-31          [-1, 512, 28, 28]       2,359,808
      BatchNorm2d-32          [-1, 512, 28, 28]           1,024
             ReLU-33          [-1, 512, 28, 28]               0
        MaxPool2d-34          [-1, 512, 14, 14]               0
           Conv2d-35          [-1, 512, 14, 14]       2,359,808
      BatchNorm2d-36          [-1, 512, 14, 14]           1,024
             ReLU-37          [-1, 512, 14, 14]               0
           Conv2d-38          [-1, 512, 14, 14]       2,359,808
      BatchNorm2d-39          [-1, 512, 14, 14]           1,024
             ReLU-40          [-1, 512, 14, 14]               0
           Conv2d-41          [-1, 512, 14, 14]       2,359,808
      BatchNorm2d-42          [-1, 512, 14, 14]           1,024
             ReLU-43          [-1, 512, 14, 14]               0
        MaxPool2d-44            [-1, 512, 7, 7]               0
           Linear-45                 [-1, 4096]     102,764,544
             ReLU-46                 [-1, 4096]               0
          Dropout-47                 [-1, 4096]               0
           Linear-48                 [-1, 4096]      16,781,312
             ReLU-49                 [-1, 4096]               0
          Dropout-50                 [-1, 4096]               0
           Linear-51                    [-1, 2]           8,194
================================================================
Total params: 134,277,186
Trainable params: 134,277,186
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 321.94
Params size (MB): 512.23
Estimated Total Size (MB): 834.74
----------------------------------------------------------------
In [30]:
best_acc = MINIMUM_ACC_4RECORDING

train_n_epochs(m0_VGG16, N_EPOCHS - epoch, train_loader, valid_loader, criterion, optimizer, scheduler, current_model_name,
               writer_m0_VGG16, valid_writer_m0_VGG16, best_acc)
 20%|██        | 6/30 [00:08<00:36,  1.51s/it]
line120: best_acc = 60.0, valids_acc = 62.5
 23%|██▎       | 7/30 [00:17<01:27,  3.80s/it]
CPU times: user 1.61 s, sys: 1.61 s, total: 3.22 s
Wall time: 7.64 s
 37%|███▋      | 11/30 [00:25<00:43,  2.31s/it]
line19 epoch #11, reverting back to model w/ 62.5 accuracy
 53%|█████▎    | 16/30 [00:34<00:23,  1.71s/it]
line19 epoch #16, reverting back to model w/ 62.5 accuracy
 70%|███████   | 21/30 [00:42<00:14,  1.61s/it]
line19 epoch #21, reverting back to model w/ 62.5 accuracy
 87%|████████▋ | 26/30 [00:51<00:06,  1.74s/it]
line19 epoch #26, reverting back to model w/ 62.5 accuracy
 93%|█████████▎| 28/30 [00:56<00:03,  1.93s/it]
line120: best_acc = 62.5, valids_acc = 87.5
 97%|█████████▋| 29/30 [01:10<00:05,  5.71s/it]
CPU times: user 1.67 s, sys: 1.62 s, total: 3.29 s
Wall time: 12.3 s
100%|██████████| 30/30 [01:12<00:00,  2.40s/it]
Out[30]:
87.5

Demonstrate Results

Successfull Examples

In [31]:
# 1=1 # STOPP

Bad Examples

Pretrained Models

image

torchVision models

In [32]:
import torchvision.models as models

m1_vgg11 = models.vgg11(pretrained=True)
m2_resnet101 = models.resnet101(pretrained=True)
m3_squeeze_net = models.squeezenet1_1(pretrained=True)
m4_mobilenet_v3_L = models.mobilenet_v3_large(pretrained=True)
m5_mobilenet_v3_S = models.mobilenet_v3_small(pretrained=True)

N_OF_LAST_LAYERS_TO_TRAIN = 20
Downloading: "https://download.pytorch.org/models/vgg11-bbd30ac9.pth" to /root/.cache/torch/hub/checkpoints/vgg11-bbd30ac9.pth

Downloading: "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" to /root/.cache/torch/hub/checkpoints/resnet101-5d3b4d8f.pth

Downloading: "https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth" to /root/.cache/torch/hub/checkpoints/squeezenet1_1-f364aa15.pth

Downloading: "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_large-8738ca79.pth

Downloading: "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_small-047dcff4.pth

Fine Tuning & Transfer Learning

Video ref, Start at 2:30

6:50 - freeze layers

In [33]:
class identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x


def freeze_models_n_first_layers(model, nof_layers_to_freeze):
    """
    if we want to train only the 2 last layers call function like:
        freeze_models_n_first_layers(model, count_model_layers(model.parameters) - 2)
    """
    freezed_counter = 0
    for name, param in model.named_parameters():
        if freezed_counter >= nof_layers_to_freeze:
            param.requires_grad = False
        else:
            # Printing the layers that will be trained
            # print(name, param)
            
            # finishing func & loop, stopped freeze
            return

    # for param, limiter in zip(model.parameters(),range(0,nof_layers_to_freeze)):
    #     param.requires_grad = False



def count_model_layers(model):
    counter = 0
    for param in model.parameters():
        counter += 1

    print(counter)
    return counter
    
freeze_models_n_first_layers(m1_vgg11, count_model_layers(m1_vgg11) - N_OF_LAST_LAYERS_TO_TRAIN)


print(m1_vgg11)
22
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (16): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (17): ReLU(inplace=True)
    (18): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (19): ReLU(inplace=True)
    (20): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)

TensorBoard Writers

In [34]:
writer_m1_vgg11 = SummaryWriter(f'runs/m1_vgg11_{experiment}')
writer_m2_resnet101 = SummaryWriter(f'runs/m2_resnet101 _{experiment}')
writer_m3_squeeze_net = SummaryWriter(f'runs/m3_Squeeze_net_{experiment}')
writer_m4_mobilenet_v3_L = SummaryWriter(f'runs/m4_mobilenet_v3_L{experiment}')
writer_m5_mobilenet_v3_S = SummaryWriter(f'runs/m5_mobilenet_v3_S_{experiment}')


valid_writer_m1_vgg11 = SummaryWriter(f'runs/m1_vgg11_{experiment}_valid')
valid_writer_m2_resnet101 = SummaryWriter(f'runs/m2_resnet101 _{experiment}_valid')
valid_writer_m3_squeeze_net = SummaryWriter(f'runs/m3_Squeeze_net_{experiment}_valid')
valid_writer_m4_mobilenet_v3_L = SummaryWriter(f'runs/m4_mobilenet_v3_L{experiment}_valid')
valid_writer_m5_mobilenet_v3_S = SummaryWriter(f'runs/m5_mobilenet_v3_S_{experiment}_valid')



#Ensamble: 
writer_model_ensembles = SummaryWriter(f'runs/model_ensembles{experiment}')
valid_writer_model_ensembles  = SummaryWriter(f'runs/model_ensembles_{experiment}_valid')

VGG11

In [35]:
freeze_models_n_first_layers(m2_resnet101, count_model_layers(m2_resnet101) - N_OF_LAST_LAYERS_TO_TRAIN)
314
In [36]:
optimizer = torch.optim.Adam(m1_vgg11.parameters(), lr=LEARNING_RATE)
criterion = FL()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

m1_vgg11 = m1_vgg11.to(device)

if Path(f"{SAVED_MODEL_PATH}m1_vgg11.wow").exists():
    epoch, loss = load_model(f"{SAVED_MODEL_PATH}m1_vgg11.wow", m1_vgg11, optimizer)
else:
    epoch = 0


# Print model's architecture
summary(m1_vgg11, SUMMARY_TENSOR_SIZE)
cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
         MaxPool2d-3         [-1, 64, 112, 112]               0
            Conv2d-4        [-1, 128, 112, 112]          73,856
              ReLU-5        [-1, 128, 112, 112]               0
         MaxPool2d-6          [-1, 128, 56, 56]               0
            Conv2d-7          [-1, 256, 56, 56]         295,168
              ReLU-8          [-1, 256, 56, 56]               0
            Conv2d-9          [-1, 256, 56, 56]         590,080
             ReLU-10          [-1, 256, 56, 56]               0
        MaxPool2d-11          [-1, 256, 28, 28]               0
           Conv2d-12          [-1, 512, 28, 28]       1,180,160
             ReLU-13          [-1, 512, 28, 28]               0
           Conv2d-14          [-1, 512, 28, 28]       2,359,808
             ReLU-15          [-1, 512, 28, 28]               0
        MaxPool2d-16          [-1, 512, 14, 14]               0
           Conv2d-17          [-1, 512, 14, 14]       2,359,808
             ReLU-18          [-1, 512, 14, 14]               0
           Conv2d-19          [-1, 512, 14, 14]       2,359,808
             ReLU-20          [-1, 512, 14, 14]               0
        MaxPool2d-21            [-1, 512, 7, 7]               0
AdaptiveAvgPool2d-22            [-1, 512, 7, 7]               0
           Linear-23                 [-1, 4096]     102,764,544
             ReLU-24                 [-1, 4096]               0
          Dropout-25                 [-1, 4096]               0
           Linear-26                 [-1, 4096]      16,781,312
             ReLU-27                 [-1, 4096]               0
          Dropout-28                 [-1, 4096]               0
           Linear-29                 [-1, 1000]       4,097,000
================================================================
Total params: 132,863,336
Trainable params: 132,863,336
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 125.37
Params size (MB): 506.83
Estimated Total Size (MB): 632.78
----------------------------------------------------------------
In [37]:
# Set minimum accuracy for saving the model:
best_acc = MINIMUM_ACC_4RECORDING

train_n_epochs(m1_vgg11, (N_EPOCHS - epoch), train_loader, valid_loader, criterion, optimizer, scheduler, 'm1_vgg11',
                   writer_m1_vgg11, valid_writer_m1_vgg11, best_acc)
 23%|██▎       | 7/30 [00:09<00:31,  1.38s/it]
line120: best_acc = 60.0, valids_acc = 62.5
 27%|██▋       | 8/30 [00:29<02:33,  6.98s/it]
CPU times: user 1.68 s, sys: 1.42 s, total: 3.1 s
Wall time: 18.8 s
 40%|████      | 12/30 [00:38<00:55,  3.07s/it]
line19 epoch #12, reverting back to model w/ 62.5 accuracy
 57%|█████▋    | 17/30 [00:46<00:22,  1.74s/it]
line19 epoch #17, reverting back to model w/ 62.5 accuracy
 73%|███████▎  | 22/30 [00:54<00:12,  1.54s/it]
line19 epoch #22, reverting back to model w/ 62.5 accuracy
 90%|█████████ | 27/30 [01:02<00:04,  1.38s/it]
line19 epoch #27, reverting back to model w/ 62.5 accuracy
100%|██████████| 30/30 [01:07<00:00,  2.25s/it]
Out[37]:
62.5

ResNet101

In [38]:
freeze_models_n_first_layers(m2_resnet101, count_model_layers(m2_resnet101) - N_OF_LAST_LAYERS_TO_TRAIN)
314
In [39]:
optimizer = torch.optim.Adam(m2_resnet101.parameters(), lr=LEARNING_RATE)
criterion = FL()

m2_resnet101 = m2_resnet101.to(device)

# Get model from file 
if Path(f"{SAVED_MODEL_PATH}m2_resnet101.wow").exists():
    epoch, loss = load_model(f"{SAVED_MODEL_PATH}m2_resnet101.wow", m2_resnet101, optimizer)
else:
    epoch = 0

# Print model's architecture
summary(m2_resnet101, SUMMARY_TENSOR_SIZE)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256, 56, 56]             512
             ReLU-15          [-1, 256, 56, 56]               0
       Bottleneck-16          [-1, 256, 56, 56]               0
           Conv2d-17           [-1, 64, 56, 56]          16,384
      BatchNorm2d-18           [-1, 64, 56, 56]             128
             ReLU-19           [-1, 64, 56, 56]               0
           Conv2d-20           [-1, 64, 56, 56]          36,864
      BatchNorm2d-21           [-1, 64, 56, 56]             128
             ReLU-22           [-1, 64, 56, 56]               0
           Conv2d-23          [-1, 256, 56, 56]          16,384
      BatchNorm2d-24          [-1, 256, 56, 56]             512
             ReLU-25          [-1, 256, 56, 56]               0
       Bottleneck-26          [-1, 256, 56, 56]               0
           Conv2d-27           [-1, 64, 56, 56]          16,384
      BatchNorm2d-28           [-1, 64, 56, 56]             128
             ReLU-29           [-1, 64, 56, 56]               0
           Conv2d-30           [-1, 64, 56, 56]          36,864
      BatchNorm2d-31           [-1, 64, 56, 56]             128
             ReLU-32           [-1, 64, 56, 56]               0
           Conv2d-33          [-1, 256, 56, 56]          16,384
      BatchNorm2d-34          [-1, 256, 56, 56]             512
             ReLU-35          [-1, 256, 56, 56]               0
       Bottleneck-36          [-1, 256, 56, 56]               0
           Conv2d-37          [-1, 128, 56, 56]          32,768
      BatchNorm2d-38          [-1, 128, 56, 56]             256
             ReLU-39          [-1, 128, 56, 56]               0
           Conv2d-40          [-1, 128, 28, 28]         147,456
      BatchNorm2d-41          [-1, 128, 28, 28]             256
             ReLU-42          [-1, 128, 28, 28]               0
           Conv2d-43          [-1, 512, 28, 28]          65,536
      BatchNorm2d-44          [-1, 512, 28, 28]           1,024
           Conv2d-45          [-1, 512, 28, 28]         131,072
      BatchNorm2d-46          [-1, 512, 28, 28]           1,024
             ReLU-47          [-1, 512, 28, 28]               0
       Bottleneck-48          [-1, 512, 28, 28]               0
           Conv2d-49          [-1, 128, 28, 28]          65,536
      BatchNorm2d-50          [-1, 128, 28, 28]             256
             ReLU-51          [-1, 128, 28, 28]               0
           Conv2d-52          [-1, 128, 28, 28]         147,456
      BatchNorm2d-53          [-1, 128, 28, 28]             256
             ReLU-54          [-1, 128, 28, 28]               0
           Conv2d-55          [-1, 512, 28, 28]          65,536
      BatchNorm2d-56          [-1, 512, 28, 28]           1,024
             ReLU-57          [-1, 512, 28, 28]               0
       Bottleneck-58          [-1, 512, 28, 28]               0
           Conv2d-59          [-1, 128, 28, 28]          65,536
      BatchNorm2d-60          [-1, 128, 28, 28]             256
             ReLU-61          [-1, 128, 28, 28]               0
           Conv2d-62          [-1, 128, 28, 28]         147,456
      BatchNorm2d-63          [-1, 128, 28, 28]             256
             ReLU-64          [-1, 128, 28, 28]               0
           Conv2d-65          [-1, 512, 28, 28]          65,536
      BatchNorm2d-66          [-1, 512, 28, 28]           1,024
             ReLU-67          [-1, 512, 28, 28]               0
       Bottleneck-68          [-1, 512, 28, 28]               0
           Conv2d-69          [-1, 128, 28, 28]          65,536
      BatchNorm2d-70          [-1, 128, 28, 28]             256
             ReLU-71          [-1, 128, 28, 28]               0
           Conv2d-72          [-1, 128, 28, 28]         147,456
      BatchNorm2d-73          [-1, 128, 28, 28]             256
             ReLU-74          [-1, 128, 28, 28]               0
           Conv2d-75          [-1, 512, 28, 28]          65,536
      BatchNorm2d-76          [-1, 512, 28, 28]           1,024
             ReLU-77          [-1, 512, 28, 28]               0
       Bottleneck-78          [-1, 512, 28, 28]               0
           Conv2d-79          [-1, 256, 28, 28]         131,072
      BatchNorm2d-80          [-1, 256, 28, 28]             512
             ReLU-81          [-1, 256, 28, 28]               0
           Conv2d-82          [-1, 256, 14, 14]         589,824
      BatchNorm2d-83          [-1, 256, 14, 14]             512
             ReLU-84          [-1, 256, 14, 14]               0
           Conv2d-85         [-1, 1024, 14, 14]         262,144
      BatchNorm2d-86         [-1, 1024, 14, 14]           2,048
           Conv2d-87         [-1, 1024, 14, 14]         524,288
      BatchNorm2d-88         [-1, 1024, 14, 14]           2,048
             ReLU-89         [-1, 1024, 14, 14]               0
       Bottleneck-90         [-1, 1024, 14, 14]               0
           Conv2d-91          [-1, 256, 14, 14]         262,144
      BatchNorm2d-92          [-1, 256, 14, 14]             512
             ReLU-93          [-1, 256, 14, 14]               0
           Conv2d-94          [-1, 256, 14, 14]         589,824
      BatchNorm2d-95          [-1, 256, 14, 14]             512
             ReLU-96          [-1, 256, 14, 14]               0
           Conv2d-97         [-1, 1024, 14, 14]         262,144
      BatchNorm2d-98         [-1, 1024, 14, 14]           2,048
             ReLU-99         [-1, 1024, 14, 14]               0
      Bottleneck-100         [-1, 1024, 14, 14]               0
          Conv2d-101          [-1, 256, 14, 14]         262,144
     BatchNorm2d-102          [-1, 256, 14, 14]             512
            ReLU-103          [-1, 256, 14, 14]               0
          Conv2d-104          [-1, 256, 14, 14]         589,824
     BatchNorm2d-105          [-1, 256, 14, 14]             512
            ReLU-106          [-1, 256, 14, 14]               0
          Conv2d-107         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-108         [-1, 1024, 14, 14]           2,048
            ReLU-109         [-1, 1024, 14, 14]               0
      Bottleneck-110         [-1, 1024, 14, 14]               0
          Conv2d-111          [-1, 256, 14, 14]         262,144
     BatchNorm2d-112          [-1, 256, 14, 14]             512
            ReLU-113          [-1, 256, 14, 14]               0
          Conv2d-114          [-1, 256, 14, 14]         589,824
     BatchNorm2d-115          [-1, 256, 14, 14]             512
            ReLU-116          [-1, 256, 14, 14]               0
          Conv2d-117         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-118         [-1, 1024, 14, 14]           2,048
            ReLU-119         [-1, 1024, 14, 14]               0
      Bottleneck-120         [-1, 1024, 14, 14]               0
          Conv2d-121          [-1, 256, 14, 14]         262,144
     BatchNorm2d-122          [-1, 256, 14, 14]             512
            ReLU-123          [-1, 256, 14, 14]               0
          Conv2d-124          [-1, 256, 14, 14]         589,824
     BatchNorm2d-125          [-1, 256, 14, 14]             512
            ReLU-126          [-1, 256, 14, 14]               0
          Conv2d-127         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-128         [-1, 1024, 14, 14]           2,048
            ReLU-129         [-1, 1024, 14, 14]               0
      Bottleneck-130         [-1, 1024, 14, 14]               0
          Conv2d-131          [-1, 256, 14, 14]         262,144
     BatchNorm2d-132          [-1, 256, 14, 14]             512
            ReLU-133          [-1, 256, 14, 14]               0
          Conv2d-134          [-1, 256, 14, 14]         589,824
     BatchNorm2d-135          [-1, 256, 14, 14]             512
            ReLU-136          [-1, 256, 14, 14]               0
          Conv2d-137         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-138         [-1, 1024, 14, 14]           2,048
            ReLU-139         [-1, 1024, 14, 14]               0
      Bottleneck-140         [-1, 1024, 14, 14]               0
          Conv2d-141          [-1, 256, 14, 14]         262,144
     BatchNorm2d-142          [-1, 256, 14, 14]             512
            ReLU-143          [-1, 256, 14, 14]               0
          Conv2d-144          [-1, 256, 14, 14]         589,824
     BatchNorm2d-145          [-1, 256, 14, 14]             512
            ReLU-146          [-1, 256, 14, 14]               0
          Conv2d-147         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-148         [-1, 1024, 14, 14]           2,048
            ReLU-149         [-1, 1024, 14, 14]               0
      Bottleneck-150         [-1, 1024, 14, 14]               0
          Conv2d-151          [-1, 256, 14, 14]         262,144
     BatchNorm2d-152          [-1, 256, 14, 14]             512
            ReLU-153          [-1, 256, 14, 14]               0
          Conv2d-154          [-1, 256, 14, 14]         589,824
     BatchNorm2d-155          [-1, 256, 14, 14]             512
            ReLU-156          [-1, 256, 14, 14]               0
          Conv2d-157         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-158         [-1, 1024, 14, 14]           2,048
            ReLU-159         [-1, 1024, 14, 14]               0
      Bottleneck-160         [-1, 1024, 14, 14]               0
          Conv2d-161          [-1, 256, 14, 14]         262,144
     BatchNorm2d-162          [-1, 256, 14, 14]             512
            ReLU-163          [-1, 256, 14, 14]               0
          Conv2d-164          [-1, 256, 14, 14]         589,824
     BatchNorm2d-165          [-1, 256, 14, 14]             512
            ReLU-166          [-1, 256, 14, 14]               0
          Conv2d-167         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-168         [-1, 1024, 14, 14]           2,048
            ReLU-169         [-1, 1024, 14, 14]               0
      Bottleneck-170         [-1, 1024, 14, 14]               0
          Conv2d-171          [-1, 256, 14, 14]         262,144
     BatchNorm2d-172          [-1, 256, 14, 14]             512
            ReLU-173          [-1, 256, 14, 14]               0
          Conv2d-174          [-1, 256, 14, 14]         589,824
     BatchNorm2d-175          [-1, 256, 14, 14]             512
            ReLU-176          [-1, 256, 14, 14]               0
          Conv2d-177         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-178         [-1, 1024, 14, 14]           2,048
            ReLU-179         [-1, 1024, 14, 14]               0
      Bottleneck-180         [-1, 1024, 14, 14]               0
          Conv2d-181          [-1, 256, 14, 14]         262,144
     BatchNorm2d-182          [-1, 256, 14, 14]             512
            ReLU-183          [-1, 256, 14, 14]               0
          Conv2d-184          [-1, 256, 14, 14]         589,824
     BatchNorm2d-185          [-1, 256, 14, 14]             512
            ReLU-186          [-1, 256, 14, 14]               0
          Conv2d-187         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-188         [-1, 1024, 14, 14]           2,048
            ReLU-189         [-1, 1024, 14, 14]               0
      Bottleneck-190         [-1, 1024, 14, 14]               0
          Conv2d-191          [-1, 256, 14, 14]         262,144
     BatchNorm2d-192          [-1, 256, 14, 14]             512
            ReLU-193          [-1, 256, 14, 14]               0
          Conv2d-194          [-1, 256, 14, 14]         589,824
     BatchNorm2d-195          [-1, 256, 14, 14]             512
            ReLU-196          [-1, 256, 14, 14]               0
          Conv2d-197         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-198         [-1, 1024, 14, 14]           2,048
            ReLU-199         [-1, 1024, 14, 14]               0
      Bottleneck-200         [-1, 1024, 14, 14]               0
          Conv2d-201          [-1, 256, 14, 14]         262,144
     BatchNorm2d-202          [-1, 256, 14, 14]             512
            ReLU-203          [-1, 256, 14, 14]               0
          Conv2d-204          [-1, 256, 14, 14]         589,824
     BatchNorm2d-205          [-1, 256, 14, 14]             512
            ReLU-206          [-1, 256, 14, 14]               0
          Conv2d-207         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-208         [-1, 1024, 14, 14]           2,048
            ReLU-209         [-1, 1024, 14, 14]               0
      Bottleneck-210         [-1, 1024, 14, 14]               0
          Conv2d-211          [-1, 256, 14, 14]         262,144
     BatchNorm2d-212          [-1, 256, 14, 14]             512
            ReLU-213          [-1, 256, 14, 14]               0
          Conv2d-214          [-1, 256, 14, 14]         589,824
     BatchNorm2d-215          [-1, 256, 14, 14]             512
            ReLU-216          [-1, 256, 14, 14]               0
          Conv2d-217         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-218         [-1, 1024, 14, 14]           2,048
            ReLU-219         [-1, 1024, 14, 14]               0
      Bottleneck-220         [-1, 1024, 14, 14]               0
          Conv2d-221          [-1, 256, 14, 14]         262,144
     BatchNorm2d-222          [-1, 256, 14, 14]             512
            ReLU-223          [-1, 256, 14, 14]               0
          Conv2d-224          [-1, 256, 14, 14]         589,824
     BatchNorm2d-225          [-1, 256, 14, 14]             512
            ReLU-226          [-1, 256, 14, 14]               0
          Conv2d-227         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-228         [-1, 1024, 14, 14]           2,048
            ReLU-229         [-1, 1024, 14, 14]               0
      Bottleneck-230         [-1, 1024, 14, 14]               0
          Conv2d-231          [-1, 256, 14, 14]         262,144
     BatchNorm2d-232          [-1, 256, 14, 14]             512
            ReLU-233          [-1, 256, 14, 14]               0
          Conv2d-234          [-1, 256, 14, 14]         589,824
     BatchNorm2d-235          [-1, 256, 14, 14]             512
            ReLU-236          [-1, 256, 14, 14]               0
          Conv2d-237         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-238         [-1, 1024, 14, 14]           2,048
            ReLU-239         [-1, 1024, 14, 14]               0
      Bottleneck-240         [-1, 1024, 14, 14]               0
          Conv2d-241          [-1, 256, 14, 14]         262,144
     BatchNorm2d-242          [-1, 256, 14, 14]             512
            ReLU-243          [-1, 256, 14, 14]               0
          Conv2d-244          [-1, 256, 14, 14]         589,824
     BatchNorm2d-245          [-1, 256, 14, 14]             512
            ReLU-246          [-1, 256, 14, 14]               0
          Conv2d-247         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-248         [-1, 1024, 14, 14]           2,048
            ReLU-249         [-1, 1024, 14, 14]               0
      Bottleneck-250         [-1, 1024, 14, 14]               0
          Conv2d-251          [-1, 256, 14, 14]         262,144
     BatchNorm2d-252          [-1, 256, 14, 14]             512
            ReLU-253          [-1, 256, 14, 14]               0
          Conv2d-254          [-1, 256, 14, 14]         589,824
     BatchNorm2d-255          [-1, 256, 14, 14]             512
            ReLU-256          [-1, 256, 14, 14]               0
          Conv2d-257         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-258         [-1, 1024, 14, 14]           2,048
            ReLU-259         [-1, 1024, 14, 14]               0
      Bottleneck-260         [-1, 1024, 14, 14]               0
          Conv2d-261          [-1, 256, 14, 14]         262,144
     BatchNorm2d-262          [-1, 256, 14, 14]             512
            ReLU-263          [-1, 256, 14, 14]               0
          Conv2d-264          [-1, 256, 14, 14]         589,824
     BatchNorm2d-265          [-1, 256, 14, 14]             512
            ReLU-266          [-1, 256, 14, 14]               0
          Conv2d-267         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-268         [-1, 1024, 14, 14]           2,048
            ReLU-269         [-1, 1024, 14, 14]               0
      Bottleneck-270         [-1, 1024, 14, 14]               0
          Conv2d-271          [-1, 256, 14, 14]         262,144
     BatchNorm2d-272          [-1, 256, 14, 14]             512
            ReLU-273          [-1, 256, 14, 14]               0
          Conv2d-274          [-1, 256, 14, 14]         589,824
     BatchNorm2d-275          [-1, 256, 14, 14]             512
            ReLU-276          [-1, 256, 14, 14]               0
          Conv2d-277         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-278         [-1, 1024, 14, 14]           2,048
            ReLU-279         [-1, 1024, 14, 14]               0
      Bottleneck-280         [-1, 1024, 14, 14]               0
          Conv2d-281          [-1, 256, 14, 14]         262,144
     BatchNorm2d-282          [-1, 256, 14, 14]             512
            ReLU-283          [-1, 256, 14, 14]               0
          Conv2d-284          [-1, 256, 14, 14]         589,824
     BatchNorm2d-285          [-1, 256, 14, 14]             512
            ReLU-286          [-1, 256, 14, 14]               0
          Conv2d-287         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-288         [-1, 1024, 14, 14]           2,048
            ReLU-289         [-1, 1024, 14, 14]               0
      Bottleneck-290         [-1, 1024, 14, 14]               0
          Conv2d-291          [-1, 256, 14, 14]         262,144
     BatchNorm2d-292          [-1, 256, 14, 14]             512
            ReLU-293          [-1, 256, 14, 14]               0
          Conv2d-294          [-1, 256, 14, 14]         589,824
     BatchNorm2d-295          [-1, 256, 14, 14]             512
            ReLU-296          [-1, 256, 14, 14]               0
          Conv2d-297         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-298         [-1, 1024, 14, 14]           2,048
            ReLU-299         [-1, 1024, 14, 14]               0
      Bottleneck-300         [-1, 1024, 14, 14]               0
          Conv2d-301          [-1, 256, 14, 14]         262,144
     BatchNorm2d-302          [-1, 256, 14, 14]             512
            ReLU-303          [-1, 256, 14, 14]               0
          Conv2d-304          [-1, 256, 14, 14]         589,824
     BatchNorm2d-305          [-1, 256, 14, 14]             512
            ReLU-306          [-1, 256, 14, 14]               0
          Conv2d-307         [-1, 1024, 14, 14]         262,144
     BatchNorm2d-308         [-1, 1024, 14, 14]           2,048
            ReLU-309         [-1, 1024, 14, 14]               0
      Bottleneck-310         [-1, 1024, 14, 14]               0
          Conv2d-311          [-1, 512, 14, 14]         524,288
     BatchNorm2d-312          [-1, 512, 14, 14]           1,024
            ReLU-313          [-1, 512, 14, 14]               0
          Conv2d-314            [-1, 512, 7, 7]       2,359,296
     BatchNorm2d-315            [-1, 512, 7, 7]           1,024
            ReLU-316            [-1, 512, 7, 7]               0
          Conv2d-317           [-1, 2048, 7, 7]       1,048,576
     BatchNorm2d-318           [-1, 2048, 7, 7]           4,096
          Conv2d-319           [-1, 2048, 7, 7]       2,097,152
     BatchNorm2d-320           [-1, 2048, 7, 7]           4,096
            ReLU-321           [-1, 2048, 7, 7]               0
      Bottleneck-322           [-1, 2048, 7, 7]               0
          Conv2d-323            [-1, 512, 7, 7]       1,048,576
     BatchNorm2d-324            [-1, 512, 7, 7]           1,024
            ReLU-325            [-1, 512, 7, 7]               0
          Conv2d-326            [-1, 512, 7, 7]       2,359,296
     BatchNorm2d-327            [-1, 512, 7, 7]           1,024
            ReLU-328            [-1, 512, 7, 7]               0
          Conv2d-329           [-1, 2048, 7, 7]       1,048,576
     BatchNorm2d-330           [-1, 2048, 7, 7]           4,096
            ReLU-331           [-1, 2048, 7, 7]               0
      Bottleneck-332           [-1, 2048, 7, 7]               0
          Conv2d-333            [-1, 512, 7, 7]       1,048,576
     BatchNorm2d-334            [-1, 512, 7, 7]           1,024
            ReLU-335            [-1, 512, 7, 7]               0
          Conv2d-336            [-1, 512, 7, 7]       2,359,296
     BatchNorm2d-337            [-1, 512, 7, 7]           1,024
            ReLU-338            [-1, 512, 7, 7]               0
          Conv2d-339           [-1, 2048, 7, 7]       1,048,576
     BatchNorm2d-340           [-1, 2048, 7, 7]           4,096
            ReLU-341           [-1, 2048, 7, 7]               0
      Bottleneck-342           [-1, 2048, 7, 7]               0
AdaptiveAvgPool2d-343           [-1, 2048, 1, 1]               0
          Linear-344                 [-1, 1000]       2,049,000
================================================================
Total params: 44,549,160
Trainable params: 44,549,160
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 429.73
Params size (MB): 169.94
Estimated Total Size (MB): 600.25
----------------------------------------------------------------
In [40]:
# Set minimum accuracy for saving the model:
best_acc = MINIMUM_ACC_4RECORDING

train_n_epochs(m2_resnet101, N_EPOCHS - epoch, train_loader, valid_loader, criterion, optimizer, scheduler, 'm2_resnet101',
                   writer_m2_resnet101, valid_writer_m2_resnet101, best_acc)
 20%|██        | 6/30 [00:10<00:44,  1.85s/it]
line120: best_acc = 60.0, valids_acc = 62.5
 23%|██▎       | 7/30 [00:14<00:51,  2.24s/it]
CPU times: user 599 ms, sys: 386 ms, total: 984 ms
Wall time: 1.79 s
 37%|███▋      | 11/30 [00:20<00:33,  1.77s/it]
line19 epoch #11, reverting back to model w/ 62.5 accuracy
 53%|█████▎    | 16/30 [00:29<00:23,  1.69s/it]
line19 epoch #16, reverting back to model w/ 62.5 accuracy
 70%|███████   | 21/30 [00:37<00:14,  1.56s/it]
line19 epoch #21, reverting back to model w/ 62.5 accuracy
 83%|████████▎ | 25/30 [00:43<00:07,  1.56s/it]
line120: best_acc = 62.5, valids_acc = 75.0
 87%|████████▋ | 26/30 [00:47<00:08,  2.23s/it]
CPU times: user 599 ms, sys: 475 ms, total: 1.07 s
Wall time: 1.64 s
100%|██████████| 30/30 [00:55<00:00,  1.83s/it]
Out[40]:
75.0

SqueezeNet 1.1

In [41]:
freeze_models_n_first_layers(m3_squeeze_net, count_model_layers(m3_squeeze_net) - N_OF_LAST_LAYERS_TO_TRAIN)
52
In [42]:
optimizer = torch.optim.Adam(m3_squeeze_net.parameters(), lr=LEARNING_RATE)
criterion = FL()


m3_squeeze_net = m3_squeeze_net.to(device)

# Get model from file 
if Path(f"{SAVED_MODEL_PATH}m3_squeeze_net.wow").exists():
    epoch, loss = load_model(f"{SAVED_MODEL_PATH}m3_squeeze_net.wow", m3_squeeze_net, optimizer)
else:
    epoch = 0


# Print model's architecture
summary(m3_squeeze_net, SUMMARY_TENSOR_SIZE)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 64, 111, 111]           1,792
              ReLU-2         [-1, 64, 111, 111]               0
         MaxPool2d-3           [-1, 64, 55, 55]               0
            Conv2d-4           [-1, 16, 55, 55]           1,040
              ReLU-5           [-1, 16, 55, 55]               0
            Conv2d-6           [-1, 64, 55, 55]           1,088
              ReLU-7           [-1, 64, 55, 55]               0
            Conv2d-8           [-1, 64, 55, 55]           9,280
              ReLU-9           [-1, 64, 55, 55]               0
             Fire-10          [-1, 128, 55, 55]               0
           Conv2d-11           [-1, 16, 55, 55]           2,064
             ReLU-12           [-1, 16, 55, 55]               0
           Conv2d-13           [-1, 64, 55, 55]           1,088
             ReLU-14           [-1, 64, 55, 55]               0
           Conv2d-15           [-1, 64, 55, 55]           9,280
             ReLU-16           [-1, 64, 55, 55]               0
             Fire-17          [-1, 128, 55, 55]               0
        MaxPool2d-18          [-1, 128, 27, 27]               0
           Conv2d-19           [-1, 32, 27, 27]           4,128
             ReLU-20           [-1, 32, 27, 27]               0
           Conv2d-21          [-1, 128, 27, 27]           4,224
             ReLU-22          [-1, 128, 27, 27]               0
           Conv2d-23          [-1, 128, 27, 27]          36,992
             ReLU-24          [-1, 128, 27, 27]               0
             Fire-25          [-1, 256, 27, 27]               0
           Conv2d-26           [-1, 32, 27, 27]           8,224
             ReLU-27           [-1, 32, 27, 27]               0
           Conv2d-28          [-1, 128, 27, 27]           4,224
             ReLU-29          [-1, 128, 27, 27]               0
           Conv2d-30          [-1, 128, 27, 27]          36,992
             ReLU-31          [-1, 128, 27, 27]               0
             Fire-32          [-1, 256, 27, 27]               0
        MaxPool2d-33          [-1, 256, 13, 13]               0
           Conv2d-34           [-1, 48, 13, 13]          12,336
             ReLU-35           [-1, 48, 13, 13]               0
           Conv2d-36          [-1, 192, 13, 13]           9,408
             ReLU-37          [-1, 192, 13, 13]               0
           Conv2d-38          [-1, 192, 13, 13]          83,136
             ReLU-39          [-1, 192, 13, 13]               0
             Fire-40          [-1, 384, 13, 13]               0
           Conv2d-41           [-1, 48, 13, 13]          18,480
             ReLU-42           [-1, 48, 13, 13]               0
           Conv2d-43          [-1, 192, 13, 13]           9,408
             ReLU-44          [-1, 192, 13, 13]               0
           Conv2d-45          [-1, 192, 13, 13]          83,136
             ReLU-46          [-1, 192, 13, 13]               0
             Fire-47          [-1, 384, 13, 13]               0
           Conv2d-48           [-1, 64, 13, 13]          24,640
             ReLU-49           [-1, 64, 13, 13]               0
           Conv2d-50          [-1, 256, 13, 13]          16,640
             ReLU-51          [-1, 256, 13, 13]               0
           Conv2d-52          [-1, 256, 13, 13]         147,712
             ReLU-53          [-1, 256, 13, 13]               0
             Fire-54          [-1, 512, 13, 13]               0
           Conv2d-55           [-1, 64, 13, 13]          32,832
             ReLU-56           [-1, 64, 13, 13]               0
           Conv2d-57          [-1, 256, 13, 13]          16,640
             ReLU-58          [-1, 256, 13, 13]               0
           Conv2d-59          [-1, 256, 13, 13]         147,712
             ReLU-60          [-1, 256, 13, 13]               0
             Fire-61          [-1, 512, 13, 13]               0
          Dropout-62          [-1, 512, 13, 13]               0
           Conv2d-63         [-1, 1000, 13, 13]         513,000
             ReLU-64         [-1, 1000, 13, 13]               0
AdaptiveAvgPool2d-65           [-1, 1000, 1, 1]               0
================================================================
Total params: 1,235,496
Trainable params: 1,235,496
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 53.76
Params size (MB): 4.71
Estimated Total Size (MB): 59.05
----------------------------------------------------------------
In [43]:
# Set minimum accuracy for saving the model:
best_acc = MINIMUM_ACC_4RECORDING
train_n_epochs(m3_squeeze_net, N_EPOCHS - epoch, train_loader, valid_loader, criterion, optimizer, scheduler, 'm3_squeeze_net',
                   writer_m3_squeeze_net, valid_writer_m3_squeeze_net, best_acc)
 30%|███       | 9/30 [00:12<00:27,  1.30s/it]
line120: best_acc = 60.0, valids_acc = 62.5
CPU times: user 23.1 ms, sys: 11 ms, total: 34.1 ms
Wall time: 33.9 ms
 47%|████▋     | 14/30 [00:19<00:22,  1.41s/it]
line19 epoch #13, reverting back to model w/ 62.5 accuracy
 63%|██████▎   | 19/30 [00:26<00:14,  1.31s/it]
line19 epoch #18, reverting back to model w/ 62.5 accuracy
 80%|████████  | 24/30 [00:32<00:07,  1.30s/it]
line19 epoch #23, reverting back to model w/ 62.5 accuracy
 97%|█████████▋| 29/30 [00:39<00:01,  1.30s/it]
line19 epoch #28, reverting back to model w/ 62.5 accuracy
100%|██████████| 30/30 [00:40<00:00,  1.34s/it]
Out[43]:
62.5

mobilenet_v3_Large

In [44]:
freeze_models_n_first_layers(m4_mobilenet_v3_L, count_model_layers(m4_mobilenet_v3_L) - N_OF_LAST_LAYERS_TO_TRAIN)
174
In [45]:
optimizer = torch.optim.Adam(m4_mobilenet_v3_L.parameters(), lr=LEARNING_RATE)
criterion = FL()

m4_mobilenet_v3_L = m4_mobilenet_v3_L.to(device)

# Get model from file 
if Path(f"{SAVED_MODEL_PATH}m4_mobilenet_v3_L.wow").exists():
    epoch, loss = load_model(f"{SAVED_MODEL_PATH}m4_mobilenet_v3_L.wow", m4_mobilenet_v3_L, optimizer)
else:
    epoch = 0
    
# Print model's architecture
summary(m4_mobilenet_v3_L, SUMMARY_TENSOR_SIZE)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 16, 112, 112]             432
       BatchNorm2d-2         [-1, 16, 112, 112]              32
         Hardswish-3         [-1, 16, 112, 112]               0
            Conv2d-4         [-1, 16, 112, 112]             144
       BatchNorm2d-5         [-1, 16, 112, 112]              32
              ReLU-6         [-1, 16, 112, 112]               0
            Conv2d-7         [-1, 16, 112, 112]             256
       BatchNorm2d-8         [-1, 16, 112, 112]              32
          Identity-9         [-1, 16, 112, 112]               0
 InvertedResidual-10         [-1, 16, 112, 112]               0
           Conv2d-11         [-1, 64, 112, 112]           1,024
      BatchNorm2d-12         [-1, 64, 112, 112]             128
             ReLU-13         [-1, 64, 112, 112]               0
           Conv2d-14           [-1, 64, 56, 56]             576
      BatchNorm2d-15           [-1, 64, 56, 56]             128
             ReLU-16           [-1, 64, 56, 56]               0
           Conv2d-17           [-1, 24, 56, 56]           1,536
      BatchNorm2d-18           [-1, 24, 56, 56]              48
         Identity-19           [-1, 24, 56, 56]               0
 InvertedResidual-20           [-1, 24, 56, 56]               0
           Conv2d-21           [-1, 72, 56, 56]           1,728
      BatchNorm2d-22           [-1, 72, 56, 56]             144
             ReLU-23           [-1, 72, 56, 56]               0
           Conv2d-24           [-1, 72, 56, 56]             648
      BatchNorm2d-25           [-1, 72, 56, 56]             144
             ReLU-26           [-1, 72, 56, 56]               0
           Conv2d-27           [-1, 24, 56, 56]           1,728
      BatchNorm2d-28           [-1, 24, 56, 56]              48
         Identity-29           [-1, 24, 56, 56]               0
 InvertedResidual-30           [-1, 24, 56, 56]               0
           Conv2d-31           [-1, 72, 56, 56]           1,728
      BatchNorm2d-32           [-1, 72, 56, 56]             144
             ReLU-33           [-1, 72, 56, 56]               0
           Conv2d-34           [-1, 72, 28, 28]           1,800
      BatchNorm2d-35           [-1, 72, 28, 28]             144
             ReLU-36           [-1, 72, 28, 28]               0
           Conv2d-37             [-1, 24, 1, 1]           1,752
             ReLU-38             [-1, 24, 1, 1]               0
           Conv2d-39             [-1, 72, 1, 1]           1,800
SqueezeExcitation-40           [-1, 72, 28, 28]               0
           Conv2d-41           [-1, 40, 28, 28]           2,880
      BatchNorm2d-42           [-1, 40, 28, 28]              80
         Identity-43           [-1, 40, 28, 28]               0
 InvertedResidual-44           [-1, 40, 28, 28]               0
           Conv2d-45          [-1, 120, 28, 28]           4,800
      BatchNorm2d-46          [-1, 120, 28, 28]             240
             ReLU-47          [-1, 120, 28, 28]               0
           Conv2d-48          [-1, 120, 28, 28]           3,000
      BatchNorm2d-49          [-1, 120, 28, 28]             240
             ReLU-50          [-1, 120, 28, 28]               0
           Conv2d-51             [-1, 32, 1, 1]           3,872
             ReLU-52             [-1, 32, 1, 1]               0
           Conv2d-53            [-1, 120, 1, 1]           3,960
SqueezeExcitation-54          [-1, 120, 28, 28]               0
           Conv2d-55           [-1, 40, 28, 28]           4,800
      BatchNorm2d-56           [-1, 40, 28, 28]              80
         Identity-57           [-1, 40, 28, 28]               0
 InvertedResidual-58           [-1, 40, 28, 28]               0
           Conv2d-59          [-1, 120, 28, 28]           4,800
      BatchNorm2d-60          [-1, 120, 28, 28]             240
             ReLU-61          [-1, 120, 28, 28]               0
           Conv2d-62          [-1, 120, 28, 28]           3,000
      BatchNorm2d-63          [-1, 120, 28, 28]             240
             ReLU-64          [-1, 120, 28, 28]               0
           Conv2d-65             [-1, 32, 1, 1]           3,872
             ReLU-66             [-1, 32, 1, 1]               0
           Conv2d-67            [-1, 120, 1, 1]           3,960
SqueezeExcitation-68          [-1, 120, 28, 28]               0
           Conv2d-69           [-1, 40, 28, 28]           4,800
      BatchNorm2d-70           [-1, 40, 28, 28]              80
         Identity-71           [-1, 40, 28, 28]               0
 InvertedResidual-72           [-1, 40, 28, 28]               0
           Conv2d-73          [-1, 240, 28, 28]           9,600
      BatchNorm2d-74          [-1, 240, 28, 28]             480
        Hardswish-75          [-1, 240, 28, 28]               0
           Conv2d-76          [-1, 240, 14, 14]           2,160
      BatchNorm2d-77          [-1, 240, 14, 14]             480
        Hardswish-78          [-1, 240, 14, 14]               0
           Conv2d-79           [-1, 80, 14, 14]          19,200
      BatchNorm2d-80           [-1, 80, 14, 14]             160
         Identity-81           [-1, 80, 14, 14]               0
 InvertedResidual-82           [-1, 80, 14, 14]               0
           Conv2d-83          [-1, 200, 14, 14]          16,000
      BatchNorm2d-84          [-1, 200, 14, 14]             400
        Hardswish-85          [-1, 200, 14, 14]               0
           Conv2d-86          [-1, 200, 14, 14]           1,800
      BatchNorm2d-87          [-1, 200, 14, 14]             400
        Hardswish-88          [-1, 200, 14, 14]               0
           Conv2d-89           [-1, 80, 14, 14]          16,000
      BatchNorm2d-90           [-1, 80, 14, 14]             160
         Identity-91           [-1, 80, 14, 14]               0
 InvertedResidual-92           [-1, 80, 14, 14]               0
           Conv2d-93          [-1, 184, 14, 14]          14,720
      BatchNorm2d-94          [-1, 184, 14, 14]             368
        Hardswish-95          [-1, 184, 14, 14]               0
           Conv2d-96          [-1, 184, 14, 14]           1,656
      BatchNorm2d-97          [-1, 184, 14, 14]             368
        Hardswish-98          [-1, 184, 14, 14]               0
           Conv2d-99           [-1, 80, 14, 14]          14,720
     BatchNorm2d-100           [-1, 80, 14, 14]             160
        Identity-101           [-1, 80, 14, 14]               0
InvertedResidual-102           [-1, 80, 14, 14]               0
          Conv2d-103          [-1, 184, 14, 14]          14,720
     BatchNorm2d-104          [-1, 184, 14, 14]             368
       Hardswish-105          [-1, 184, 14, 14]               0
          Conv2d-106          [-1, 184, 14, 14]           1,656
     BatchNorm2d-107          [-1, 184, 14, 14]             368
       Hardswish-108          [-1, 184, 14, 14]               0
          Conv2d-109           [-1, 80, 14, 14]          14,720
     BatchNorm2d-110           [-1, 80, 14, 14]             160
        Identity-111           [-1, 80, 14, 14]               0
InvertedResidual-112           [-1, 80, 14, 14]               0
          Conv2d-113          [-1, 480, 14, 14]          38,400
     BatchNorm2d-114          [-1, 480, 14, 14]             960
       Hardswish-115          [-1, 480, 14, 14]               0
          Conv2d-116          [-1, 480, 14, 14]           4,320
     BatchNorm2d-117          [-1, 480, 14, 14]             960
       Hardswish-118          [-1, 480, 14, 14]               0
          Conv2d-119            [-1, 120, 1, 1]          57,720
            ReLU-120            [-1, 120, 1, 1]               0
          Conv2d-121            [-1, 480, 1, 1]          58,080
SqueezeExcitation-122          [-1, 480, 14, 14]               0
          Conv2d-123          [-1, 112, 14, 14]          53,760
     BatchNorm2d-124          [-1, 112, 14, 14]             224
        Identity-125          [-1, 112, 14, 14]               0
InvertedResidual-126          [-1, 112, 14, 14]               0
          Conv2d-127          [-1, 672, 14, 14]          75,264
     BatchNorm2d-128          [-1, 672, 14, 14]           1,344
       Hardswish-129          [-1, 672, 14, 14]               0
          Conv2d-130          [-1, 672, 14, 14]           6,048
     BatchNorm2d-131          [-1, 672, 14, 14]           1,344
       Hardswish-132          [-1, 672, 14, 14]               0
          Conv2d-133            [-1, 168, 1, 1]         113,064
            ReLU-134            [-1, 168, 1, 1]               0
          Conv2d-135            [-1, 672, 1, 1]         113,568
SqueezeExcitation-136          [-1, 672, 14, 14]               0
          Conv2d-137          [-1, 112, 14, 14]          75,264
     BatchNorm2d-138          [-1, 112, 14, 14]             224
        Identity-139          [-1, 112, 14, 14]               0
InvertedResidual-140          [-1, 112, 14, 14]               0
          Conv2d-141          [-1, 672, 14, 14]          75,264
     BatchNorm2d-142          [-1, 672, 14, 14]           1,344
       Hardswish-143          [-1, 672, 14, 14]               0
          Conv2d-144            [-1, 672, 7, 7]          16,800
     BatchNorm2d-145            [-1, 672, 7, 7]           1,344
       Hardswish-146            [-1, 672, 7, 7]               0
          Conv2d-147            [-1, 168, 1, 1]         113,064
            ReLU-148            [-1, 168, 1, 1]               0
          Conv2d-149            [-1, 672, 1, 1]         113,568
SqueezeExcitation-150            [-1, 672, 7, 7]               0
          Conv2d-151            [-1, 160, 7, 7]         107,520
     BatchNorm2d-152            [-1, 160, 7, 7]             320
        Identity-153            [-1, 160, 7, 7]               0
InvertedResidual-154            [-1, 160, 7, 7]               0
          Conv2d-155            [-1, 960, 7, 7]         153,600
     BatchNorm2d-156            [-1, 960, 7, 7]           1,920
       Hardswish-157            [-1, 960, 7, 7]               0
          Conv2d-158            [-1, 960, 7, 7]          24,000
     BatchNorm2d-159            [-1, 960, 7, 7]           1,920
       Hardswish-160            [-1, 960, 7, 7]               0
          Conv2d-161            [-1, 240, 1, 1]         230,640
            ReLU-162            [-1, 240, 1, 1]               0
          Conv2d-163            [-1, 960, 1, 1]         231,360
SqueezeExcitation-164            [-1, 960, 7, 7]               0
          Conv2d-165            [-1, 160, 7, 7]         153,600
     BatchNorm2d-166            [-1, 160, 7, 7]             320
        Identity-167            [-1, 160, 7, 7]               0
InvertedResidual-168            [-1, 160, 7, 7]               0
          Conv2d-169            [-1, 960, 7, 7]         153,600
     BatchNorm2d-170            [-1, 960, 7, 7]           1,920
       Hardswish-171            [-1, 960, 7, 7]               0
          Conv2d-172            [-1, 960, 7, 7]          24,000
     BatchNorm2d-173            [-1, 960, 7, 7]           1,920
       Hardswish-174            [-1, 960, 7, 7]               0
          Conv2d-175            [-1, 240, 1, 1]         230,640
            ReLU-176            [-1, 240, 1, 1]               0
          Conv2d-177            [-1, 960, 1, 1]         231,360
SqueezeExcitation-178            [-1, 960, 7, 7]               0
          Conv2d-179            [-1, 160, 7, 7]         153,600
     BatchNorm2d-180            [-1, 160, 7, 7]             320
        Identity-181            [-1, 160, 7, 7]               0
InvertedResidual-182            [-1, 160, 7, 7]               0
          Conv2d-183            [-1, 960, 7, 7]         153,600
     BatchNorm2d-184            [-1, 960, 7, 7]           1,920
       Hardswish-185            [-1, 960, 7, 7]               0
AdaptiveAvgPool2d-186            [-1, 960, 1, 1]               0
          Linear-187                 [-1, 1280]       1,230,080
       Hardswish-188                 [-1, 1280]               0
         Dropout-189                 [-1, 1280]               0
          Linear-190                 [-1, 1000]       1,281,000
================================================================
Total params: 5,483,032
Trainable params: 5,483,032
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 109.74
Params size (MB): 20.92
Estimated Total Size (MB): 131.23
----------------------------------------------------------------
In [46]:
# Set minimum accuracy for saving the model:
best_acc = MINIMUM_ACC_4RECORDING

train_n_epochs(m4_mobilenet_v3_L, N_EPOCHS - epoch, train_loader, valid_loader, criterion, optimizer, scheduler, 'm4_mobilenet_v3_L',
                   writer_m4_mobilenet_v3_L, valid_writer_m4_mobilenet_v3_L, best_acc)
 23%|██▎       | 7/30 [00:09<00:32,  1.43s/it]
line120: best_acc = 60.0, valids_acc = 62.5
CPU times: user 101 ms, sys: 44.1 ms, total: 146 ms
Wall time: 145 ms
 40%|████      | 12/30 [00:17<00:27,  1.52s/it]
line19 epoch #11, reverting back to model w/ 62.5 accuracy
 57%|█████▋    | 17/30 [00:24<00:18,  1.41s/it]
line19 epoch #16, reverting back to model w/ 62.5 accuracy
 73%|███████▎  | 22/30 [00:30<00:10,  1.35s/it]
line19 epoch #21, reverting back to model w/ 62.5 accuracy
 90%|█████████ | 27/30 [00:38<00:04,  1.48s/it]
line19 epoch #26, reverting back to model w/ 62.5 accuracy
100%|██████████| 30/30 [00:42<00:00,  1.43s/it]
Out[46]:
62.5

mobilenet_v3_Small

In [47]:
freeze_models_n_first_layers(m5_mobilenet_v3_S, count_model_layers(m5_mobilenet_v3_S) - N_OF_LAST_LAYERS_TO_TRAIN)
142
In [48]:
optimizer = torch.optim.Adam(m5_mobilenet_v3_S.parameters(), lr=LEARNING_RATE)
criterion = FL()

m5_mobilenet_v3_S = m5_mobilenet_v3_S.to(device)

# Get model from file 
if Path(f"{SAVED_MODEL_PATH}m5_mobilenet_v3_S.wow").exists():
    epoch, loss = load_model(f"{SAVED_MODEL_PATH}m5_mobilenet_v3_S.wow", m5_mobilenet_v3_S, optimizer)
else:
    epoch = 0

# Print model's architecture
summary(m5_mobilenet_v3_S, SUMMARY_TENSOR_SIZE)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 16, 112, 112]             432
       BatchNorm2d-2         [-1, 16, 112, 112]              32
         Hardswish-3         [-1, 16, 112, 112]               0
            Conv2d-4           [-1, 16, 56, 56]             144
       BatchNorm2d-5           [-1, 16, 56, 56]              32
              ReLU-6           [-1, 16, 56, 56]               0
            Conv2d-7              [-1, 8, 1, 1]             136
              ReLU-8              [-1, 8, 1, 1]               0
            Conv2d-9             [-1, 16, 1, 1]             144
SqueezeExcitation-10           [-1, 16, 56, 56]               0
           Conv2d-11           [-1, 16, 56, 56]             256
      BatchNorm2d-12           [-1, 16, 56, 56]              32
         Identity-13           [-1, 16, 56, 56]               0
 InvertedResidual-14           [-1, 16, 56, 56]               0
           Conv2d-15           [-1, 72, 56, 56]           1,152
      BatchNorm2d-16           [-1, 72, 56, 56]             144
             ReLU-17           [-1, 72, 56, 56]               0
           Conv2d-18           [-1, 72, 28, 28]             648
      BatchNorm2d-19           [-1, 72, 28, 28]             144
             ReLU-20           [-1, 72, 28, 28]               0
           Conv2d-21           [-1, 24, 28, 28]           1,728
      BatchNorm2d-22           [-1, 24, 28, 28]              48
         Identity-23           [-1, 24, 28, 28]               0
 InvertedResidual-24           [-1, 24, 28, 28]               0
           Conv2d-25           [-1, 88, 28, 28]           2,112
      BatchNorm2d-26           [-1, 88, 28, 28]             176
             ReLU-27           [-1, 88, 28, 28]               0
           Conv2d-28           [-1, 88, 28, 28]             792
      BatchNorm2d-29           [-1, 88, 28, 28]             176
             ReLU-30           [-1, 88, 28, 28]               0
           Conv2d-31           [-1, 24, 28, 28]           2,112
      BatchNorm2d-32           [-1, 24, 28, 28]              48
         Identity-33           [-1, 24, 28, 28]               0
 InvertedResidual-34           [-1, 24, 28, 28]               0
           Conv2d-35           [-1, 96, 28, 28]           2,304
      BatchNorm2d-36           [-1, 96, 28, 28]             192
        Hardswish-37           [-1, 96, 28, 28]               0
           Conv2d-38           [-1, 96, 14, 14]           2,400
      BatchNorm2d-39           [-1, 96, 14, 14]             192
        Hardswish-40           [-1, 96, 14, 14]               0
           Conv2d-41             [-1, 24, 1, 1]           2,328
             ReLU-42             [-1, 24, 1, 1]               0
           Conv2d-43             [-1, 96, 1, 1]           2,400
SqueezeExcitation-44           [-1, 96, 14, 14]               0
           Conv2d-45           [-1, 40, 14, 14]           3,840
      BatchNorm2d-46           [-1, 40, 14, 14]              80
         Identity-47           [-1, 40, 14, 14]               0
 InvertedResidual-48           [-1, 40, 14, 14]               0
           Conv2d-49          [-1, 240, 14, 14]           9,600
      BatchNorm2d-50          [-1, 240, 14, 14]             480
        Hardswish-51          [-1, 240, 14, 14]               0
           Conv2d-52          [-1, 240, 14, 14]           6,000
      BatchNorm2d-53          [-1, 240, 14, 14]             480
        Hardswish-54          [-1, 240, 14, 14]               0
           Conv2d-55             [-1, 64, 1, 1]          15,424
             ReLU-56             [-1, 64, 1, 1]               0
           Conv2d-57            [-1, 240, 1, 1]          15,600
SqueezeExcitation-58          [-1, 240, 14, 14]               0
           Conv2d-59           [-1, 40, 14, 14]           9,600
      BatchNorm2d-60           [-1, 40, 14, 14]              80
         Identity-61           [-1, 40, 14, 14]               0
 InvertedResidual-62           [-1, 40, 14, 14]               0
           Conv2d-63          [-1, 240, 14, 14]           9,600
      BatchNorm2d-64          [-1, 240, 14, 14]             480
        Hardswish-65          [-1, 240, 14, 14]               0
           Conv2d-66          [-1, 240, 14, 14]           6,000
      BatchNorm2d-67          [-1, 240, 14, 14]             480
        Hardswish-68          [-1, 240, 14, 14]               0
           Conv2d-69             [-1, 64, 1, 1]          15,424
             ReLU-70             [-1, 64, 1, 1]               0
           Conv2d-71            [-1, 240, 1, 1]          15,600
SqueezeExcitation-72          [-1, 240, 14, 14]               0
           Conv2d-73           [-1, 40, 14, 14]           9,600
      BatchNorm2d-74           [-1, 40, 14, 14]              80
         Identity-75           [-1, 40, 14, 14]               0
 InvertedResidual-76           [-1, 40, 14, 14]               0
           Conv2d-77          [-1, 120, 14, 14]           4,800
      BatchNorm2d-78          [-1, 120, 14, 14]             240
        Hardswish-79          [-1, 120, 14, 14]               0
           Conv2d-80          [-1, 120, 14, 14]           3,000
      BatchNorm2d-81          [-1, 120, 14, 14]             240
        Hardswish-82          [-1, 120, 14, 14]               0
           Conv2d-83             [-1, 32, 1, 1]           3,872
             ReLU-84             [-1, 32, 1, 1]               0
           Conv2d-85            [-1, 120, 1, 1]           3,960
SqueezeExcitation-86          [-1, 120, 14, 14]               0
           Conv2d-87           [-1, 48, 14, 14]           5,760
      BatchNorm2d-88           [-1, 48, 14, 14]              96
         Identity-89           [-1, 48, 14, 14]               0
 InvertedResidual-90           [-1, 48, 14, 14]               0
           Conv2d-91          [-1, 144, 14, 14]           6,912
      BatchNorm2d-92          [-1, 144, 14, 14]             288
        Hardswish-93          [-1, 144, 14, 14]               0
           Conv2d-94          [-1, 144, 14, 14]           3,600
      BatchNorm2d-95          [-1, 144, 14, 14]             288
        Hardswish-96          [-1, 144, 14, 14]               0
           Conv2d-97             [-1, 40, 1, 1]           5,800
             ReLU-98             [-1, 40, 1, 1]               0
           Conv2d-99            [-1, 144, 1, 1]           5,904
SqueezeExcitation-100          [-1, 144, 14, 14]               0
          Conv2d-101           [-1, 48, 14, 14]           6,912
     BatchNorm2d-102           [-1, 48, 14, 14]              96
        Identity-103           [-1, 48, 14, 14]               0
InvertedResidual-104           [-1, 48, 14, 14]               0
          Conv2d-105          [-1, 288, 14, 14]          13,824
     BatchNorm2d-106          [-1, 288, 14, 14]             576
       Hardswish-107          [-1, 288, 14, 14]               0
          Conv2d-108            [-1, 288, 7, 7]           7,200
     BatchNorm2d-109            [-1, 288, 7, 7]             576
       Hardswish-110            [-1, 288, 7, 7]               0
          Conv2d-111             [-1, 72, 1, 1]          20,808
            ReLU-112             [-1, 72, 1, 1]               0
          Conv2d-113            [-1, 288, 1, 1]          21,024
SqueezeExcitation-114            [-1, 288, 7, 7]               0
          Conv2d-115             [-1, 96, 7, 7]          27,648
     BatchNorm2d-116             [-1, 96, 7, 7]             192
        Identity-117             [-1, 96, 7, 7]               0
InvertedResidual-118             [-1, 96, 7, 7]               0
          Conv2d-119            [-1, 576, 7, 7]          55,296
     BatchNorm2d-120            [-1, 576, 7, 7]           1,152
       Hardswish-121            [-1, 576, 7, 7]               0
          Conv2d-122            [-1, 576, 7, 7]          14,400
     BatchNorm2d-123            [-1, 576, 7, 7]           1,152
       Hardswish-124            [-1, 576, 7, 7]               0
          Conv2d-125            [-1, 144, 1, 1]          83,088
            ReLU-126            [-1, 144, 1, 1]               0
          Conv2d-127            [-1, 576, 1, 1]          83,520
SqueezeExcitation-128            [-1, 576, 7, 7]               0
          Conv2d-129             [-1, 96, 7, 7]          55,296
     BatchNorm2d-130             [-1, 96, 7, 7]             192
        Identity-131             [-1, 96, 7, 7]               0
InvertedResidual-132             [-1, 96, 7, 7]               0
          Conv2d-133            [-1, 576, 7, 7]          55,296
     BatchNorm2d-134            [-1, 576, 7, 7]           1,152
       Hardswish-135            [-1, 576, 7, 7]               0
          Conv2d-136            [-1, 576, 7, 7]          14,400
     BatchNorm2d-137            [-1, 576, 7, 7]           1,152
       Hardswish-138            [-1, 576, 7, 7]               0
          Conv2d-139            [-1, 144, 1, 1]          83,088
            ReLU-140            [-1, 144, 1, 1]               0
          Conv2d-141            [-1, 576, 1, 1]          83,520
SqueezeExcitation-142            [-1, 576, 7, 7]               0
          Conv2d-143             [-1, 96, 7, 7]          55,296
     BatchNorm2d-144             [-1, 96, 7, 7]             192
        Identity-145             [-1, 96, 7, 7]               0
InvertedResidual-146             [-1, 96, 7, 7]               0
          Conv2d-147            [-1, 576, 7, 7]          55,296
     BatchNorm2d-148            [-1, 576, 7, 7]           1,152
       Hardswish-149            [-1, 576, 7, 7]               0
AdaptiveAvgPool2d-150            [-1, 576, 1, 1]               0
          Linear-151                 [-1, 1024]         590,848
       Hardswish-152                 [-1, 1024]               0
         Dropout-153                 [-1, 1024]               0
          Linear-154                 [-1, 1000]       1,025,000
================================================================
Total params: 2,542,856
Trainable params: 2,542,856
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 35.68
Params size (MB): 9.70
Estimated Total Size (MB): 45.95
----------------------------------------------------------------
In [49]:
# Set minimum accuracy for saving the model:
best_acc = MINIMUM_ACC_4RECORDING

train_n_epochs(m5_mobilenet_v3_S, N_EPOCHS - epoch, train_loader, valid_loader, criterion, optimizer, scheduler, 'm5_mobilenet_v3_S',
                   writer_m5_mobilenet_v3_S, valid_writer_m5_mobilenet_v3_S, best_acc)
 23%|██▎       | 7/30 [00:09<00:32,  1.41s/it]
line120: best_acc = 60.0, valids_acc = 62.5
CPU times: user 53.1 ms, sys: 26 ms, total: 79.2 ms
Wall time: 78.9 ms
 40%|████      | 12/30 [00:16<00:25,  1.43s/it]
line19 epoch #11, reverting back to model w/ 62.5 accuracy
 57%|█████▋    | 17/30 [00:24<00:18,  1.45s/it]
line19 epoch #16, reverting back to model w/ 62.5 accuracy
 73%|███████▎  | 22/30 [00:32<00:13,  1.65s/it]
line19 epoch #21, reverting back to model w/ 62.5 accuracy
 90%|█████████ | 27/30 [00:38<00:04,  1.41s/it]
line19 epoch #26, reverting back to model w/ 62.5 accuracy
100%|██████████| 30/30 [00:42<00:00,  1.43s/it]
Out[49]:
62.5

Model Ensembles

deticated library

Ensemble Class

In [50]:
from torch.nn import functional as F

class MyEnsemble(nn.Module):
    def __init__(self, model_1, model_2, model_3, model_4, model_5, nb_classes):
        super(MyEnsemble,self).__init__()
        self.model_1   = model_1 
        self.model_2   = model_2 
        self.model_3  = model_3  
        self.model_4     = model_4
        self.model_5  = model_5

        #Now Remove the Last layer
        self.model_1    = nn.Identity() 
        self.model_2   = nn.Identity() 
        self.model_3  = nn.Identity() 
        self.model_4     = nn.Identity()
        self.model_5  = nn.Identity() 


        # choosed from the error 
        sum_of_all_models_input_sizes =  752640 # ???? Right???  

        self.classifier = nn.Linear(sum_of_all_models_input_sizes ,nb_classes)
    
    
    def forward(self, x):
        x.clone() # clone to make sure x is not changed by inplace methods
        
        x1 = self.model_1(x)  
        x1 = x1.view(x1.size(0), -1)
        
        x2 = self.model_2(x)
        x2 = x2.view(x2.size(0), -1)
        
        x3 = self.model_3(x)
        x3 = x3.view(x3.size(0), -1)
        
        x4 = self.model_4(x)
        x4 = x4.view(x4.size(0), -1)
        
        x5 = self.model_5(x)
        x5 = x5.view(x5.size(0), -1)
         
        
        #final
        x = torch.cat((x1, x2, x3, x4, x5), dim=1)
        x = self.classifier(F.relu(x))
        
        return x

model_ensembles = MyEnsemble(m1_vgg11,m2_resnet101, m3_squeeze_net, m4_mobilenet_v3_L, m5_mobilenet_v3_S, N_CLASSES)
In [51]:
# Fetch batch
data, targets = next(iter(train_loader))

model_ensembles(data.float())
Out[51]:
tensor([[-0.1349, -0.2880],
        [ 0.0235,  0.0608],
        [-0.0665, -0.3654],
        [ 0.2492, -0.1685],
        [ 0.0523, -0.1198],
        [-0.1219, -0.3119],
        [ 0.3549,  0.0167],
        [ 0.1628, -0.1512]], grad_fn=<AddmmBackward>)

Ensemble Training

In [52]:
optimizer = torch.optim.Adam(model_ensembles.parameters(), lr=LEARNING_RATE)
criterion = FL()

model_ensembles = model_ensembles.to(device)

# Get model from file 
if Path(f"{SAVED_MODEL_PATH}model_ensembles.wow").exists():
    epoch, loss = load_model(f"{SAVED_MODEL_PATH}model_ensembles.wow", model_ensembles, optimizer)
else:
    epoch = 0

summary(model_ensembles, SUMMARY_TENSOR_SIZE)


best_acc = MINIMUM_ACC_4RECORDING

train_n_epochs(model_ensembles, N_EPOCHS - epoch, train_loader, valid_loader, criterion, optimizer, scheduler, 'model_ensembles',
                   writer_model_ensembles, valid_writer_model_ensembles, best_acc)
  0%|          | 0/30 [00:00<?, ?it/s]
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
          Identity-1          [-1, 3, 224, 224]               0
          Identity-2          [-1, 3, 224, 224]               0
          Identity-3          [-1, 3, 224, 224]               0
          Identity-4          [-1, 3, 224, 224]               0
          Identity-5          [-1, 3, 224, 224]               0
            Linear-6                    [-1, 2]       1,505,282
================================================================
Total params: 1,505,282
Trainable params: 1,505,282
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 5.74
Params size (MB): 5.74
Estimated Total Size (MB): 12.06
----------------------------------------------------------------
 27%|██▋       | 8/30 [00:09<00:28,  1.28s/it]
line120: best_acc = 60.0, valids_acc = 62.5
CPU times: user 13.4 ms, sys: 15.3 ms, total: 28.6 ms
Wall time: 28.5 ms
 33%|███▎      | 10/30 [00:12<00:26,  1.33s/it]
line120: best_acc = 62.5, valids_acc = 87.5
CPU times: user 18.8 ms, sys: 19 ms, total: 37.8 ms
Wall time: 39.7 ms
 37%|███▋      | 11/30 [00:13<00:24,  1.27s/it]
line120: best_acc = 87.5, valids_acc = 100.0
CPU times: user 12.8 ms, sys: 20.1 ms, total: 32.9 ms
Wall time: 32.8 ms
 53%|█████▎    | 16/30 [00:20<00:18,  1.29s/it]
line19 epoch #15, reverting back to model w/ 100.0 accuracy
 70%|███████   | 21/30 [00:26<00:11,  1.27s/it]
line19 epoch #20, reverting back to model w/ 100.0 accuracy
 87%|████████▋ | 26/30 [00:34<00:06,  1.59s/it]
line19 epoch #25, reverting back to model w/ 100.0 accuracy
100%|██████████| 30/30 [00:39<00:00,  1.32s/it]
Out[52]:
100.0

Summary

In this assigment we've seen how to:

  1. Build our own costum pytorch neural network
  2. Save pytorch's model and load from disk
  3. Use Tensorboard for monitoring training progress in real time
  4. Download pretrained model, and use them
  5. Build an ensemble of models, which usually is better than those who build it.
  6. When models don't train - try to overfit a small batch
  7. How to fine-tune & Transfer learning a model
  8. Manage GPU memory, by del batches of used info + Clear it's cache
  9. Revert back to best model once acc is not rising after N rounds

Screen Shots

image image

image

23May2021

image image

TensorBoard

image image image

In [53]:
# TRAIN MODEL LOOP
# DEBUG = False 
# # Train the model
# for epoch in range(N_EPOCHS):

#     avg_loss = 0
#     cnt = 0
#     for images, targets in train_loader:
#         images = images.cuda()
#         targets = targets.type(torch.LongTensor)
#         targets = targets.cuda()
       
        
#         # Forward 
#         optimizer.zero_grad()
#         outputs = vgg16(images)

#         # Prints:
#         if DEBUG:
#             print('output',output.detach().cpu().numpy(),
#                     'shape', output.detach().cpu().numpy().shape )

#             print('target',target.detach().cpu().numpy(),
#                     'shape', target.detach().cpu().numpy().shape )
            
#         # Loss 
#         loss = cost(outputs, targets)
#         avg_loss += loss.data
#         cnt += 1
#         print("[E: %d] loss: %f, avg_loss: %f" % (epoch, loss.data, avg_loss/cnt))

#         # Backward 
#         loss.backward()
#         # Optimize
#         optimizer.step()


#     scheduler.step(avg_loss)


#     # for images, targets in train_loader:
#     # for i in range(batch_size):
#     #     plt.imshow(images[i])
#     #     plt.title(targets[i])
#     #     plt.show()

# # https://stackoverflow.com/questions/55762581/expected-object-of-scalar-type-long-but-got-scalar-type-byte-for-argument-2-ta
In [54]:
# # Test The Model

# # Set model to evaluation mode 
# vgg16.eval()

# # counters:
# correct = 0
# total = 0

# for images, labels in valid_loader:
#     images = images.cuda()
#     outputs = vgg16(images)
#     _, predicted = torch.max(outputs.data, 1)
#     total += labels.size(0)
#     correct += (predicted.cpu() == labels).sum()
#     print(predicted, labels, correct, total)
#     print("avg acc: %f" % (100* correct/total))